In [36]:
from data_utils_ import *
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
import logging
from collections import OrderedDict
import argparse
import numpy as np
from Models import Pipeline, MTLoss, Prediction #CrossAttention, Context
torch.cuda.empty_cache()
import gc
from types import SimpleNamespace

In [37]:
device = "cuda:7"

In [4]:
args = SimpleNamespace(dim = 768, upscale_dim = 256, codes='128,64,32,8', batch_size=2, learning_rate=0.001, weight_decay=0.07)

In [38]:
def getLoaders2(main_task_path = './Data/SignData/train_data', scaffold_task_path = './Data/2018/train_data', batch_size=8, slice=[-1, -1, -1], test_path='./Data/SignData/test_data'):
	print('Reading the Main Task Dataset...')
	main_task_dataset = RevSigData(main_task_path, mode='MAIN', slice_=slice[0], transform=Transform(), sigtx=ScaleSigScores())
	#main_task_dataset = dataset.readData(main_task_path, Transform(), mode='MAIN', n=slice[0])
	print('Reading the Scaffolds Task Dataset...')
	scaffold_task_dataset = RevSigData(scaffold_task_path, mode='SCAFFOLDS', slice_=slice[1], transform=Transform())
	#scaffold_task_dataset = dataset.readData(scaffold_task_path, Transform(), mode='SCAFFOLDS', n=slice[1])
	

	if test_path:
		print('Reading the test Dataset')
		test_dataset = RevSigData(test_path, mode='TEST', slice_=slice[2], transform=Transform(), sigtx=ScaleSigScores())
		#test_dataset = dataset.readData(test_path, Transform(), mode='TEST', n=slice[2])
	else:
		test_dataset = None


	#length of the both task datasets
	main_task_len = len(main_task_dataset)
	scaffold_task_len = len(scaffold_task_dataset)
	test_len = len(test_dataset)

	#inflate the smaller dataset to match the size of the larger one
	if main_task_len < scaffold_task_len:
		difference = scaffold_task_len - main_task_len
		sample = [random.choice(main_task_dataset) for _ in range(difference)]
		main_task_dataset = main_task_dataset + sample
	
	# print(len(main_task_dataset), len(scaffold_task_dataset))
	#print(main_task_len, scaffold_task_len)
	return (main_task_dataset, scaffold_task_dataset, test_dataset)

In [39]:
main_task_dataset, scaffold_task_dataset, test_dataset = getLoaders2(batch_size=args.batch_size, slice=[-1,-1,-1])

NameError: name 'args' is not defined

In [14]:
main_task_dataloader = DataLoader(main_task_dataset, batch_size = args.batch_size, shuffle = True, num_workers=4)
scaffold_task_dataloader = DataLoader(scaffold_task_dataset, batch_size = args.batch_size, shuffle=True, num_workers=4)
if test_dataset != None:
    test_data_loader = DataLoader(test_dataset, batch_size=args.batch_size, shuffle=True, num_workers=4)
else:
    test_data_loader = None

In [40]:
def evaluate(model, main_task_predictor, scaffold_task_predictor, Criterion, test_loader):
	with torch.no_grad():
		eval_loss = []
		for i, d in enumerate(test_loader,0):
			scaffold_task_data = d
			papers_sc, reviews_sc, recs_sc, confs_sc, sign_m = scaffold_task_data[0].transpose(1,2).float().to(device),\
							 scaffold_task_data[1].transpose(1,2).float().to(device), \
							 scaffold_task_data[2].float().to(device),\
							 scaffold_task_data[3].float().to(device),\
							 scaffold_task_data[4].float().to(device)

			ex, subj, opine = sign_m[:,0], sign_m[:,1], sign_m[:,2]
			out, rec_codes, conf_codes = model(papers_sc, reviews_sc)
			rec_preds, conf_preds = scaffold_task_predictor(rec_codes.view(out.shape[0], -1), conf_codes.view(out.shape[0], -1))

			#out_m, rec_codes_m, conf_codes_m = model(papers_sc, reviews_sc)
			ex_preds, subj_preds, intensity_preds = main_task_predictor(out, rec_codes, conf_codes)


			loss = Criterion([rec_preds.squeeze(1), conf_preds.squeeze(1), ex_preds.squeeze(1), subj_preds.squeeze(1), intensity_preds.squeeze(1)], [recs_sc, confs_sc, ex, subj, opine])
			
			eval_loss.append(loss.item())
		return np.average(eval_loss)



def train(args, dataloaders):
    main_task_loader, scaffold_task_loader, test_loader = dataloaders
    model = Pipeline.Pipeline(args).to(device)
    main_task_predictor = Prediction.MainPrediction(args.upscale_dim, args.upscale_dim, 16).to(device)
    scaffold_task_predictor = Prediction.ScaffoldPrediction(args.upscale_dim, 8).to(device)

    print(model)
    for name, param in model.named_parameters():
        print(name, param.shape)
    print("No. of Trainable parameters {}".format(sum(p.numel() for p in model.parameters() if p.requires_grad)))

    Criterion = MTLoss.MTLoss().to(device2)
    optimizer = torch.optim.Adam(list(model.parameters()) + list(Criterion.parameters()), lr=args.learning_rate, weight_decay=args.weight_decay) #+ list(main_task_predictor.parameters()) + list(scaffold_task_predictor.parameters())
    optimizerMain = torch.optim.Adam(main_task_predictor.parameters(), lr=args.learning_rate, weight_decay=args.weight_decay)
    optimizerScaffold = torch.optim.Adam(scaffold_task_predictor.parameters(), lr=args.learning_rate, weight_decay=args.weight_decay)
    epochs = 100

    for epoch in range(epochs):
        model.train()
        epoch_loss = []
        for i, d in enumerate(zip(scaffold_task_loader, main_task_loader),0):
            #print(i)
            main_task_data = d[1]
            scaffold_task_data = d[0]
            papers_sc, reviews_sc, recs_sc, confs_sc = scaffold_task_data[0].transpose(1,2).float().to(device),\
                                 scaffold_task_data[1].transpose(1,2).float().to(device), \
                                 scaffold_task_data[2].float().to(device),\
                                 scaffold_task_data[3].float().to(device)

            #print(ex.shape, subj.shape, opine.shape, recs_sc.shape, confs_sc.shape)

            optimizer.zero_grad()
            optimizerMain.zero_grad()
            optimizerScaffold.zero_grad()
            out, rec_codes, conf_codes = model(papers_sc, reviews_sc)
            rec_preds, conf_preds = scaffold_task_predictor(rec_codes.view(out.shape[0], -1), conf_codes.view(out.shape[0], -1))
            
            del papers_sc
            del reviews_sc
            
            papers_sc, reviews_sc, sign_m = main_task_data[0].transpose(1,2).float().to(device),\
                                 main_task_data[1].transpose(1,2).float().to(device), \
                                 main_task_data[2].float().to(device)

            ex, subj, opine = sign_m[:,0], sign_m[:,1], sign_m[:,2]

            #do the for the main task
            out_m, rec_codes_m, conf_codes_m = model(papers_sc, reviews_sc)
            ex_preds, subj_preds, intensity_preds = main_task_predictor(out_m, rec_codes_m, conf_codes_m)
            #print(ex_preds.shape, subj_preds.shape, intensity_preds.shape)


            loss = Criterion([rec_preds.squeeze(1), conf_preds.squeeze(1), ex_preds.squeeze(1), subj_preds.squeeze(1), intensity_preds.squeeze(1)], [recs_sc, confs_sc, ex, subj, opine])
            epoch_loss.append(loss.item())
            loss.backward()
            optimizerMain.step()
            optimizerScaffold.step()
            optimizer.step()
        #print("Epoch {} Loss: {:.3f}".format(epoch, np.average(epoch_loss)))
            del papers_sc
            del reviews_sc
            gc.collect()
        # 	break
        # break

        with torch.no_grad():
            eval_loss = evaluate(model, main_task_predictor, scaffold_task_predictor, Criterion, test_loader)

            print('Epoch: {} Train Loss: {:.6f}, Test Loss: {:.6f}'.format(epoch, np.average(epoch_loss),\
                            eval_loss))
            # print("Exhaustive {}".format(list(zip(ex_preds.data, ex.data))))
            # print("Subjectivity {}".format(list(zip(subj_preds.data, subj.data))))
            # print("Intensity {}".format(list(zip(intensity_preds.data, opine.data))))
            # print("Recommendation {}".format(list(zip(rec_preds.data, recs_sc.data))))
            # print("Confidence {}".format(list(zip(conf_preds.data, confs_sc.data))))

            #logging.info('Predictions, Actual : {}'.format(str(list(zip(recs_preds_t, recs_sc_t)))))
        #break


def main(args, dataloaders=(main_task_dataloader, scaffold_task_dataloader, test_data_loader)):
    train(args, dataloaders)




NameError: name 'main_task_dataloader' is not defined

In [16]:
main(args, dataloaders=(main_task_dataloader, scaffold_task_dataloader, test_data_loader))

Pipeline(
  (cross_attention): CrossAttention(
    (linear1): Linear(in_features=768, out_features=256, bias=True)
    (relu): ReLU()
  )
  (contextor): Sequential(
    (coder0): Context(
      (linear): Linear(in_features=768, out_features=256, bias=True)
      (codes): Linear(in_features=256, out_features=128, bias=False)
      (act): ReLU()
    )
    (coder1): Context(
      (linear): Linear(in_features=256, out_features=256, bias=True)
      (codes): Linear(in_features=256, out_features=64, bias=False)
      (act): ReLU()
    )
    (coder2): Context(
      (linear): Linear(in_features=256, out_features=256, bias=True)
      (codes): Linear(in_features=256, out_features=32, bias=False)
      (act): ReLU()
    )
    (coder3): Context(
      (linear): Linear(in_features=256, out_features=256, bias=True)
      (codes): Linear(in_features=256, out_features=8, bias=False)
      (act): ReLU()
    )
  )
  (rec_codes): Context(
    (linear): Linear(in_features=256, out_features=256, bias=Tr

Traceback (most recent call last):
  File "/home2/tirthankar/miniconda3_1/envs/rajeev3/lib/python3.7/multiprocessing/queues.py", line 242, in _feed
    send_bytes(obj)
  File "/home2/tirthankar/miniconda3_1/envs/rajeev3/lib/python3.7/multiprocessing/connection.py", line 200, in send_bytes
    self._send_bytes(m[offset:offset + size])
  File "/home2/tirthankar/miniconda3_1/envs/rajeev3/lib/python3.7/multiprocessing/connection.py", line 404, in _send_bytes
    self._send(header + buf)
  File "/home2/tirthankar/miniconda3_1/envs/rajeev3/lib/python3.7/multiprocessing/connection.py", line 368, in _send
    n = write(self._handle, buf)
BrokenPipeError: [Errno 32] Broken pipe


KeyboardInterrupt: 

###  Test the trained model

In [41]:
def evaluate(model, main_task_predictor, scaffold_task_predictor, test_loader, Criterion=None):
    with torch.no_grad():
        eval_loss = []
        ex_preds_, ex_actual = [], []
        subj_preds_, subj_actual = [], []
        int_preds_, int_actual = [], []
        rec_preds_, rec_actual = [], []
        conf_preds_, conf_actual = [], []
        for i, d in enumerate(test_loader,0):
            scaffold_task_data = d
            papers_sc, reviews_sc, recs_sc, confs_sc, sign_m = scaffold_task_data[0].transpose(1,2).float().to(device),\
                             scaffold_task_data[1].transpose(1,2).float().to(device), \
                             scaffold_task_data[2].float().to(device),\
                             scaffold_task_data[3].float().to(device),\
                             scaffold_task_data[4].float().to(device)

            ex, subj, opine = sign_m[:,0], sign_m[:,1], sign_m[:,2]
            out, rec_codes, conf_codes = model(papers_sc, reviews_sc)
            rec_preds, conf_preds = scaffold_task_predictor(rec_codes.view(out.shape[0], -1), conf_codes.view(out.shape[0], -1))

            #out_m, rec_codes_m, conf_codes_m = model(papers_sc, reviews_sc)
            ex_preds, subj_preds, intensity_preds = main_task_predictor(out, rec_codes, conf_codes)
            ex_preds_.append(ex_preds.item())
            ex_actual.append(ex.item())
            subj_preds_.append(subj_preds.item())
            subj_actual.append(subj.item())
            int_preds_.append(intensity_preds.item())
            int_actual.append(opine.item())



            if Criterion != None:
                loss = Criterion([rec_preds.squeeze(1), conf_preds.squeeze(1), ex_preds.squeeze(1), subj_preds.squeeze(1), intensity_preds.squeeze(1)], [recs_sc, confs_sc, ex, subj, opine])
                eval_loss.append(loss.item())
            else:
                eval_loss.append(0)

        return np.average(eval_loss), (ex_preds_, ex_actual), (subj_preds_, subj_actual), (int_preds_, int_actual)

In [42]:
checkpoint = torch.load('./MODELS/exp3.pt', map_location=device)
args = SimpleNamespace(dim = checkpoint['dim'], upscale_dim = checkpoint['upscale_dim'], codes=checkpoint['codes']) 
model = Pipeline.Pipeline(args).to(device)
model.load_state_dict(checkpoint['model_state_dict'])
main_task_predictor = Prediction.MainPrediction(args.upscale_dim, args.upscale_dim, 32).to(device)
main_task_predictor.load_state_dict(checkpoint['main_state_dict'])
scaffold_task_predictor = Prediction.ScaffoldPrediction(args.upscale_dim, 8).to(device)
scaffold_task_predictor.load_state_dict(checkpoint['scaffold_state_dict'])
Criterion = MTLoss.MTLoss().to(device)
Criterion.load_state_dict(checkpoint['criterion_state_dict'])

In [43]:
_, _, test_dataset = getLoaders2(batch_size=1, slice=[-1,-1,-1])
test_data_loader = DataLoader(test_dataset, batch_size=1, shuffle=False, num_workers=4)

Reading the Main Task Dataset...
Reading the Scaffolds Task Dataset...
Reading the test Dataset


In [44]:
loss, exhaustive, subjective, opinion = evaluate(model, main_task_predictor, scaffold_task_predictor, test_data_loader, Criterion)

In [15]:
# list(zip(*opinion))

In [45]:
opine_pred, opine_actual = opinion
subj_pred, subj_actual = subjective
exhaustive_pred, exhaustive_actual = exhaustive


In [29]:
from numpy import dot
from numpy.linalg import norm
def cosine(a, b):
    return dot(a,b)/(norm(a)*norm(b))
def rmse(predictions, targets):
    return np.sqrt(np.mean((np.asarray(predictions)-np.asarray(targets))**2))

In [30]:
def get_metric(score):
    RMSE = rmse(score[0], score[1])
    sim = cosine(score[0], score[1])
    print("RMSE: {}, cosine similarity: {}".format(RMSE, sim))

In [33]:
for score in [(exhaustive_pred, exhaustive_actual), (subj_pred, subj_actual), (opine_pred, opine_actual)]:
    get_metric(score)

RMSE: 0.5017433881250435, cosine similarity: 0.9895367837062644
RMSE: 0.8054140323941106, cosine similarity: 0.982373976678173
RMSE: 1.45750686097663, cosine similarity: 0.9397649115706209


### Mean Baseline

In [34]:
exhaustive_pred = [np.average(exhaustive_actual)]*len(exhaustive_actual)
subj_pred = [np.average(subj_actual)]*len(exhaustive_actual)
opine_pred = [np.average(opine_actual)]*len(exhaustive_actual)
for score in [(exhaustive_pred, exhaustive_actual), (subj_pred, subj_actual), (opine_pred, opine_actual)]:
    get_metric(score)

RMSE: 1.5518540202548305, cosine similarity: 0.8932938675323975
RMSE: 2.1199669478437224, cosine similarity: 0.8705800832137459
RMSE: 1.6752461775372902, cosine similarity: 0.9184800659455056


### Examples

In [82]:
import os
import json
path = './Data/SignData/test/reviews'
reviews = os.listdir(path)
filenames = os.listdir('./Data/SignData/test_data/')

review_text = []
indices = []
for i, filename in enumerate(filenames):
    split = filename.split('_')
    fname, count = ''.join(c for c in split[:-1]), split[-1]
    if fname in reviews:
        rev = json.load(open(os.path.join(path, fname)))['reviews'][int(count)]
        review_text.append(rev)
    else:
        indices.append(i)

In [83]:
from collections import defaultdict
dict_ = defaultdict(lambda: {})
for i in range(len(opine_pred)):
    scores = {}
    if i not in indices:
        scores['layer1'] = (exhaustive_pred[i], exhaustive_actual[i])
        scores['layer2'] = (subj_pred[i], subj_actual[i])
        scores['layer3'] = (opine_pred[i], opine_actual[i])
        dict_[i] = scores
        

In [84]:
dict_

defaultdict(<function __main__.<lambda>()>,
            {0: {'layer1': (5.4782867431640625, 5.480629920959473),
              'layer2': (6.908432960510254, 6.05905818939209),
              'layer3': (3.190779209136963, 5.699191570281982)},
             1: {'layer1': (1.4492809772491455, 1.3321858644485474),
              'layer2': (1.6120046377182007, 1.3093312978744507),
              'layer3': (4.069751739501953, 3.9679465293884277)},
             2: {'layer1': (4.4633049964904785, 4.013131618499756),
              'layer2': (5.593626022338867, 6.707392692565918),
              'layer3': (2.8656005859375, 2.478076457977295)},
             3: {'layer1': (8.404945373535156, 8.781258583068848),
              'layer2': (10.596014976501465, 12.72822380065918),
              'layer3': (2.6522088050842285, 1.299450397491455)},
             6: {'layer1': (2.159173011779785, 1.9929183721542358),
              'layer2': (2.7302439212799072, 2.5044960975646973),
              'layer3': (4.34126

In [85]:
91, 89, 66, 62, 59, 54, (37)

(91, 89, 66, 62, 59, 54, 37)

In [86]:
l1, l2 = np.array([0.05428571,0.93013972,-0.99966815]), np.array([33.26288336,252.25067107,0.99966679])
def normalize(s):
    s = np.array(s)
    sent = s[2]
    s = (s - l1)/(l2 - l1)
    s = (s*9) + 1
    s[2] = sent
    return s

In [91]:
for i in [79]:
    new_scores = normalize(review_text[i]['SCORES'])
    print(new_scores)
    print(review_text[i])

[ 2.8522656   4.01916201 -0.78107315]
{'RECOMMENDATION': '4', 'REVIEW TITLE': ' ', 'comments': '"The authors tackle the problem of estimating risk in a survival analysis setting with competing risks. They propose directly optimizing the time-dependent discrimination index using a siamese survival network. Experiments on several real-world dataset reveal modest gains in comparison with the state of the art.\\n\\n- The authors should clearly highlight what is their main technical contribution. For example, Eqs. 1-6 appear to be background material since the time-dependent discrimination index is taken from the literature, as the authors point out earlier. However, this is unclear from the writing. \\n\\n- One of the main motivations of the authors is to propose a model that is specially design to avoid the nonidentifiability issue in an scenario with competing risks. It is unclear why the authors solution is able to solve such an issue, specially given the modest reported gains in compar

In [95]:
for i, review in enumerate(review_text):
    new_scores = normalize(review_text[i]['SCORES'])
    for k,v in dict_.items():
        if np.round(dict_[k]['layer1'][1], decimals=3) == np.round(new_scores[0], decimals=3):
            review_text[i]['PREDICTIONS'] = dict_[k]
            review_text[i]['NORMALISED'] = new_scores

In [97]:
for i, rev in enumerate(review_text):
    print(i)
    print(rev['PREDICTIONS'])
    print(rev['NORMALISED'])

0
{'layer1': (5.4782867431640625, 5.480629920959473), 'layer2': (6.908432960510254, 6.05905818939209), 'layer3': (3.190779209136963, 5.699191570281982)}
[5.48062988 6.05905833 0.04426479]
1
{'layer1': (1.4492809772491455, 1.3321858644485474), 'layer2': (1.6120046377182007, 1.3093312978744507), 'layer3': (4.069751739501953, 3.9679465293884277)}
[ 1.33218592  1.30933134 -0.34045634]
2
{'layer1': (4.4633049964904785, 4.013131618499756), 'layer2': (5.593626022338867, 6.707392692565918), 'layer3': (2.8656005859375, 2.478076457977295)}
[ 4.01313185  6.70739261 -0.67153855]
3
{'layer1': (8.404945373535156, 8.781258583068848), 'layer2': (10.596014976501465, 12.72822380065918), 'layer3': (2.6522088050842285, 1.299450397491455)}
[ 8.78125838 12.72822394 -0.93345546]
4
{'layer1': (2.159173011779785, 1.9929183721542358), 'layer2': (2.7302439212799072, 2.5044960975646973), 'layer3': (4.341269016265869, 3.453021287918091)}
[ 1.99291843  2.50449618 -0.45488415]
5
{'layer1': (3.7611050605773926, 3.685

KeyError: 'PREDICTIONS'

In [98]:
tst_indices = [77, 63, 51, 48, 43, 29, 28, 8, 3]
with open('analysis', 'w') as f:
    for ind in tst_indices:
        f.w('##############')
        print(review_text[ind])
        print('##############')

##############
{'RECOMMENDATION': '6', 'REVIEW TITLE': ' ', 'comments': '"This paper investigates the complexity of neural networks with piecewise linear activations by studying the number of linear regions of the representable functions. It builds on previous works Montufar et al. (2014) and Raghu et al. (2017) and presents improved bounds on the maximum number of linear regions. It also evaluates the number of regions of small networks during training. \\n\\nThe improved upper bound given in Theorem 1 appeared in SampTA 2017 - Mathematics of deep learning ``Notes on the number of linear regions of deep neural networks\'\' by Montufar. \\n\\nThe improved lower bound given in Theorem 6 is very modest but neat. Theorem 5 follows easily from this. \\n\\nThe improved upper bound for maxout networks follows a similar intuition but appears to be novel. \\n\\nThe paper also discusses the exact computation of the number of linear regions in small trained networks. It presents experiments duri