In [1]:
from Model.NQModel import NQModel
from Model.LossFn import LossFn
import torch
import time
import sklearn
import datetime
import Model.datasetutils as datasetutils
import Model.tensorboardutils as boardutils
import torch.utils.tensorboard as tensorboard
from tqdm import tqdm_notebook as tqdm
import transformers

In [2]:
from transformers import BertModel

In [3]:
model = BertModel.from_pretrained('bert-base-uncased')

In [4]:
for params in model.parameters() : 
    params.require_grad = False

## Constants

In [5]:
TensorBoardLocation = 'runs/NQ_TIME:{}'.format(int((time.time() - 1583988084)/60))
TensorBoardLocation

'runs/NQ_TIME:23435'

In [6]:
epochs = 1 # no loop 
use_cuda = torch.cuda.is_available()
device = torch.device("cuda:0" if use_cuda else "cpu")
writer = tensorboard.SummaryWriter(TensorBoardLocation)

## Dataset

In [7]:
traingen, validgen = datasetutils.get_dataset(num_workers = 16)

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))




In [8]:
num_steps = len(traingen)
val_steps = len(validgen)
num_steps, val_steps

(179378, 9441)

## MODEL

In [9]:
!ls runs

[34mNQ_TIME:23346[m[m [34mNQ_TIME:23368[m[m [34mNQ_TIME:23381[m[m [34mNQ_TIME:23412[m[m [34mNQ_TIME:23435[m[m
[34mNQ_TIME:23366[m[m [34mNQ_TIME:23378[m[m [34mNQ_TIME:23389[m[m [34mNQ_TIME:23428[m[m


In [10]:
model = NQModel().to(device)

In [11]:
optim = transformers.AdamW(model.parameters())
scheduler = transformers.get_cosine_schedule_with_warmup(optim, num_warmup_steps=100, num_training_steps=800,num_cycles=0.5, last_epoch=-1)

## Confusion Matrix

In [12]:
AnswerTypes = ['Wrong Ans', 'Short Ans', 'Yes No']
YesNoLabels = ['No', 'Yes']

In [13]:
def update_confusion_matrix(ATMatrix, YNMatrix, StartM, EndM, output, target) : 
    predsT = output[0].argmax(dim = 1)
    truthT = target[0].argmax(dim = 1)

    for x, y in zip(predsT, truthT) : 
        ATMatrix[x][y] += 1


    predsYN = (torch.sigmoid(output[3].flatten()) >= 0.5) + 1 -1
    truthYN = target[3].flatten()

    for x, y in zip(predsYN, truthYN) : 
        YNMatrix[x][y] += 1    


    start01 = (torch.sigmoid(output[1].flatten()) >= 0.5) +1 -1
    end01   = (torch.sigmoid(output[2].flatten()) >= 0.5) +1 -1

    startcm = sklearn.metrics.confusion_matrix(target[1].flatten().numpy(), start01)
    endcm   = sklearn.metrics.confusion_matrix(target[2].flatten().numpy(), end01)

    StartM += torch.from_numpy(startcm)
    EndM   += torch.from_numpy(endcm)    

In [14]:
def log_confusion_matrix(matrix, labels, name, step): 
    opfigure = boardutils.confusion_matrix_image(matrix.numpy(), labels)
    writer.add_figure(name, opfigure, step)
    
def log_matrices(AnsTypeM, YNM, StM, EndM, call_type, steps):
    log_confusion_matrix(AnsTypeM, AnswerTypes, "Answer type confusion matrix" + call_type, steps)
    log_confusion_matrix(YNM, YesNoLabels, "Yes No confusion matrix" + call_type, steps)
    log_confusion_matrix(StM, YesNoLabels, "Start confusion matrix" + call_type, steps)
    log_confusion_matrix(EndM, YesNoLabels, "End confusion matrix" + call_type, steps) 

## Run

In [15]:
def validate(val_num) : 
    model.eval()
    
    ValAnswerTypeMatrix = torch.zeros([3, 3], requires_grad = False)
    ValYesNoMatrix      = torch.zeros([2, 2], requires_grad = False)
    ValStartMatrix      = torch.zeros([2, 2], requires_grad = False)
    ValEndMatrix        = torch.zeros([2, 2], requires_grad = False)
    
    at_l, start_l, end_l, yn_l = 0,0,0,0
    
    with torch.no_grad():
        for bert_enc, ans_type, start, end, yes_no in tqdm(validgen) : 
            output = model(bert_enc.to(device))  

            detached_output = (output[0].detach().cpu(), output[1].detach().cpu(), output[2].detach().cpu(), output[3].detach().cpu())
            update_confusion_matrix(ValAnswerTypeMatrix, ValYesNoMatrix, ValStartMatrix, ValEndMatrix, detached_output, (ans_type, start, end, yes_no))

            ## Calculate Loss
            at_l += LossFn.loss_AT(detached_output[0], ans_type.squeeze().argmax(1)).item()
            start_l += LossFn.loss_start(detached_output[1], start.squeeze().type(torch.FloatTensor)).item()
            end_l += LossFn.loss_end(detached_output[2], end.squeeze().type(torch.FloatTensor)).item()
            yn_l += LossFn.loss_yes_no(detached_output[3], yes_no.squeeze()).item()
            
            
    ## Save loss values
    writer.add_scalars('Loss values Validation',
        {"AT_loss_val" : at_l,"Start_loss_val":start_l, "End_loss_val":end_l, "Yes_no_loss_val":yn_l},
        val_num, time.time())

    log_matrices(ValAnswerTypeMatrix, ValYesNoMatrix, ValStartMatrix, ValEndMatrix, " eval", val_num)    

In [16]:
def train() : 
    AnswerTypeMatrix = torch.zeros([3,3], requires_grad = False)
    YesNoMatrix      = torch.zeros([2,2], requires_grad = False)
    StartMatrix      = torch.zeros([2,2], requires_grad = False)
    EndMatrix        = torch.zeros([2,2], requires_grad = False)

    start_time = time.time()
    model.train()
    steps = -1

    for bert_enc, ans_type, start, end, yes_no in tqdm(traingen) : 
        steps += 1
        optim.zero_grad()
        output = model(bert_enc.to(device).squeeze())
        
        detached_output = (output[0].detach().cpu(), output[1].detach().cpu(), output[2].detach().cpu(), output[3].detach().cpu())
        ## Calculate Confusion Matrix
        update_confusion_matrix(AnswerTypeMatrix, YesNoMatrix, StartMatrix, EndMatrix, detached_output, (ans_type, start, end, yes_no))
        if steps%5 == 0: log_matrices(AnswerTypeMatrix, YesNoMatrix, StartMatrix, EndMatrix, " train", steps)

        ## Calculate Loss
        AT_loss = LossFn.loss_AT(output[0], ans_type.squeeze().argmax(1).to(device))
        Start_loss = LossFn.loss_start(output[1], start.squeeze().type(torch.FloatTensor).to(device))
        End_loss = LossFn.loss_end(output[2], end.squeeze().type(torch.FloatTensor).to(device))
        Yes_no_loss = LossFn.loss_yes_no(output[3], yes_no.squeeze().to(device))
        
        ## Update model params and optim/sched
        total_loss = AT_loss + Start_loss + End_loss + Yes_no_loss
        total_loss.backward()

        ## Save loss values
        writer.add_scalars('Loss values',
            {"AT_loss" : AT_loss.item(),"Start_loss":Start_loss.item(), "End_loss":End_loss.item(), "Yes_no_loss":Yes_no_loss.item()},
            steps, time.time())

        cur_time = time.time() - start_time
        expected_time = (cur_time*num_steps)/(steps + 1)
        print ("elapsed time : " + str(time.time() - start_time)+ " : expected time : " +  str(expected_time))

        optim.step()
        scheduler.step()     
        
        if steps%20 == 0 : validate(steps/20)

In [17]:
train()

HBox(children=(IntProgress(value=0, max=179378), HTML(value='')))

torch.Size([64, 1, 512, 768])
elapsed time : 478.9382960796356 : expected time : 85910988.45659304


HBox(children=(IntProgress(value=0, max=9441), HTML(value='')))

OSError: [Errno 24] Too many open files

In [None]:
16, 512*768