In [1]:
from data_utils_ import *
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
import logging
from collections import OrderedDict
import argparse
import numpy as np
from Models import Pipeline, MTLoss, Prediction #CrossAttention, Context
torch.cuda.empty_cache()
import gc
from types import SimpleNamespace

In [4]:
device1 = "cuda:7"
device2 = "cuda:2"

In [5]:
args = SimpleNamespace(dim = 768, upscale_dim = 256, codes='64,32,8', batch_size=4, learning_rate=0.01, weight_decay=0.005)

In [6]:
def getLoaders2(main_task_path = './Data/SignData/train_data', scaffold_task_path = './Data/2018/train_data', batch_size=8, slice=[-1, -1, -1], test_path='./Data/SignData/test_data'):
	print('Reading the Main Task Dataset...')
	main_task_dataset = RevSigData(main_task_path, mode='MAIN', slice_=slice[0], transform=Transform(), sigtx=ScaleSigScores())
	#main_task_dataset = dataset.readData(main_task_path, Transform(), mode='MAIN', n=slice[0])
	print('Reading the Scaffolds Task Dataset...')
	scaffold_task_dataset = RevSigData(scaffold_task_path, mode='SCAFFOLDS', slice_=slice[1], transform=Transform())
	#scaffold_task_dataset = dataset.readData(scaffold_task_path, Transform(), mode='SCAFFOLDS', n=slice[1])
	

	if test_path:
		print('Reading the test Dataset')
		test_dataset = RevSigData(test_path, mode='TEST', slice_=slice[2], transform=Transform(), sigtx=ScaleSigScores())
		#test_dataset = dataset.readData(test_path, Transform(), mode='TEST', n=slice[2])
	else:
		test_dataset = None


	#length of the both task datasets
	main_task_len = len(main_task_dataset)
	scaffold_task_len = len(scaffold_task_dataset)
	test_len = len(test_dataset)

	#inflate the smaller dataset to match the size of the larger one
	if main_task_len < scaffold_task_len:
		difference = scaffold_task_len - main_task_len
		sample = [random.choice(main_task_dataset) for _ in range(difference)]
		main_task_dataset = main_task_dataset + sample
	
	# print(len(main_task_dataset), len(scaffold_task_dataset))
	#print(main_task_len, scaffold_task_len)
	return (main_task_dataset, scaffold_task_dataset, test_dataset)

In [8]:
main_task_dataset, scaffold_task_dataset, test_dataset = getLoaders2(batch_size=args.batch_size, slice=[100,100,100])

Reading the Main Task Dataset...
Reading the Scaffolds Task Dataset...
Reading the test Dataset


In [9]:
main_task_dataloader = DataLoader(main_task_dataset, batch_size = args.batch_size, shuffle = True, num_workers=4)
scaffold_task_dataloader = DataLoader(scaffold_task_dataset, batch_size = args.batch_size, shuffle=True, num_workers=4)
if test_dataset != None:
    test_data_loader = DataLoader(test_dataset, batch_size=args.batch_size, shuffle=True, num_workers=4)
else:
    test_data_loader = None

In [9]:
def evaluate(model, main_task_predictor, scaffold_task_predictor, Criterion, test_loader):
	with torch.no_grad():
		eval_loss = []
		for i, d in enumerate(test_loader,0):
			scaffold_task_data = d
			papers_sc, reviews_sc, recs_sc, confs_sc, sign_m = scaffold_task_data[0].transpose(1,2).float().to(device),\
							 scaffold_task_data[1].transpose(1,2).float().to(device), \
							 scaffold_task_data[2].float().to(device),\
							 scaffold_task_data[3].float().to(device),\
							 scaffold_task_data[4].float().to(device)

			ex, subj, opine = sign_m[:,0], sign_m[:,1], sign_m[:,2]
			out, rec_codes, conf_codes = model(papers_sc, reviews_sc)
			rec_preds, conf_preds = scaffold_task_predictor(rec_codes.view(out.shape[0], -1), conf_codes.view(out.shape[0], -1))

			out_m, rec_codes_m, conf_codes_m = model(papers_sc, reviews_sc)
			ex_preds, subj_preds, intensity_preds = main_task_predictor(out, rec_codes_m, conf_codes_m)


			loss = Criterion([rec_preds.squeeze(1), conf_preds.squeeze(1), ex_preds.squeeze(1), subj_preds.squeeze(1), intensity_preds.squeeze(1)], [recs_sc, confs_sc, ex, subj, opine])
			
			eval_loss.append(loss.item())
		return np.average(eval_loss)



def train(args, dataloaders):
    main_task_loader, scaffold_task_loader, test_loader = dataloaders
    model = Pipeline.Pipeline(args).to(device)
    main_task_predictor = Prediction.MainPrediction(args.upscale_dim, args.upscale_dim, 16).to(device)
    scaffold_task_predictor = Prediction.ScaffoldPrediction(args.upscale_dim, 8).to(device)

    print(model)
    for name, param in model.named_parameters():
        print(name, param.shape)
    print("No. of Trainable parameters {}".format(sum(p.numel() for p in model.parameters() if p.requires_grad)))

    Criterion = MTLoss.MTLoss().to(device)
    optimizer = torch.optim.Adam(list(model.parameters()) + list(Criterion.parameters()) + list(main_task_predictor.parameters()) + list(scaffold_task_predictor.parameters()), lr=args.learning_rate, weight_decay=args.weight_decay)
    optimizerMain = torch.optim.Adam(main_task_predictor.parameters(), lr=args.learning_rate, weight_decay=args.weight_decay)
    epochs = 100

    for epoch in range(epochs):
        model.train()
        epoch_loss = []
        for i, d in enumerate(zip(scaffold_task_loader, main_task_loader),0):
            #print(i)
            main_task_data = d[1]
            scaffold_task_data = d[0]
            papers_sc, reviews_sc, recs_sc, confs_sc = scaffold_task_data[0].transpose(1,2).float().to(device),\
                                 scaffold_task_data[1].transpose(1,2).float().to(device), \
                                 scaffold_task_data[2].float().to(device),\
                                 scaffold_task_data[3].float().to(device)

            #print(ex.shape, subj.shape, opine.shape, recs_sc.shape, confs_sc.shape)

            optimizer.zero_grad()
            out, rec_codes, conf_codes = model(papers_sc, reviews_sc)
            rec_preds, conf_preds = scaffold_task_predictor(rec_codes.view(out.shape[0], -1), conf_codes.view(out.shape[0], -1))
            
            del papers_sc
            del reviews_sc
            
            papers_m, reviews_m, sign_m = main_task_data[0].transpose(1,2).float().to(device),\
                                 main_task_data[1].transpose(1,2).float().to(device), \
                                 main_task_data[2].float().to(device)

            ex, subj, opine = sign_m[:,0], sign_m[:,1], sign_m[:,2]

            #do the for the main task
            out_m, rec_codes_m, conf_codes_m = model(papers_m, reviews_m)
            ex_preds, subj_preds, intensity_preds = main_task_predictor(out_m, rec_codes_m, conf_codes_m)
            #print(ex_preds.shape, subj_preds.shape, intensity_preds.shape)


            loss = Criterion([rec_preds.squeeze(1), conf_preds.squeeze(1), ex_preds.squeeze(1), subj_preds.squeeze(1), intensity_preds.squeeze(1)], [recs_sc, confs_sc, ex, subj, opine])
            epoch_loss.append(loss.item())
            loss.backward()
            optimizer.step()
        #print("Epoch {} Loss: {:.3f}".format(epoch, np.average(epoch_loss)))
            del papers_m
            del reviews_m
            gc.collect()
        # 	break
        # break

        with torch.no_grad():
            eval_loss = evaluate(model, main_task_predictor, scaffold_task_predictor, Criterion, test_loader)

            print('Epoch: {} Train Loss: {:.6f}, Test Loss: {:.6f}'.format(epoch, np.average(epoch_loss),\
                            eval_loss))
            # print("Exhaustive {}".format(list(zip(ex_preds.data, ex.data))))
            # print("Subjectivity {}".format(list(zip(subj_preds.data, subj.data))))
            # print("Intensity {}".format(list(zip(intensity_preds.data, opine.data))))
            # print("Recommendation {}".format(list(zip(rec_preds.data, recs_sc.data))))
            # print("Confidence {}".format(list(zip(conf_preds.data, confs_sc.data))))

            #logging.info('Predictions, Actual : {}'.format(str(list(zip(recs_preds_t, recs_sc_t)))))
        #break


def main(args, dataloaders=(main_task_dataloader, scaffold_task_dataloader, test_data_loader)):
    train(args, dataloaders)




In [11]:
main(args, dataloaders=(main_task_dataloader, scaffold_task_dataloader, test_data_loader))

Pipeline(
  (cross_attention): CrossAttention(
    (linear1): Linear(in_features=768, out_features=256, bias=True)
    (relu): ReLU()
  )
  (contextor): Sequential(
    (coder0): Context(
      (linear): Linear(in_features=768, out_features=256, bias=True)
      (codes): Linear(in_features=256, out_features=64, bias=False)
      (act): ReLU()
    )
    (coder1): Context(
      (linear): Linear(in_features=256, out_features=256, bias=True)
      (codes): Linear(in_features=256, out_features=32, bias=False)
      (act): ReLU()
    )
    (coder2): Context(
      (linear): Linear(in_features=256, out_features=256, bias=True)
      (codes): Linear(in_features=256, out_features=8, bias=False)
      (act): ReLU()
    )
  )
  (rec_codes): Context(
    (linear): Linear(in_features=256, out_features=256, bias=True)
    (codes): Linear(in_features=256, out_features=8, bias=False)
    (act): ReLU()
  )
  (conf_codes): Context(
    (linear): Linear(in_features=256, out_features=256, bias=True)
    

RuntimeError: CUDA out of memory. Tried to allocate 7.14 GiB (GPU 7; 10.91 GiB total capacity; 301.59 MiB already allocated; 3.10 GiB free; 7.00 GiB cached)

In [10]:
args

namespace(dim=768,
          upscale_dim=256,
          codes='64,32,8',
          batch_size=4,
          learning_rate=0.01)