In [1]:
%load_ext autoreload
%autoreload 2

import torch
import numpy as np
from tqdm import tqdm
import torch.nn as nn
import pandas as pd
import argparse
from tqdm import tqdm
import sys
from transformers import get_linear_schedule_with_warmup
from datetime import date
from torch.utils.tensorboard import SummaryWriter
sys.path.insert(0, '../')
from models.elmo import returnElmoModel
sys.path.insert(0, '../utils')
from utils.allennlp_utils import returnDataLoader

In [2]:
train_loader = returnDataLoader("../data/train.csv", 16)
valid_loader = returnDataLoader("../data/valid.csv", 16)

7612it [00:04, 1640.91it/s]
1632it [00:00, 1903.23it/s]


In [3]:
model = returnElmoModel()

In [4]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

elmoModel(
  (word_embeddings): BasicTextFieldEmbedder(
    (token_embedder_tokens): ElmoTokenEmbedder(
      (_elmo): Elmo(
        (_elmo_lstm): _ElmoBiLm(
          (_token_embedder): _ElmoCharacterEncoder(
            (char_conv_0): Conv1d(16, 32, kernel_size=(1,), stride=(1,))
            (char_conv_1): Conv1d(16, 32, kernel_size=(2,), stride=(1,))
            (char_conv_2): Conv1d(16, 64, kernel_size=(3,), stride=(1,))
            (char_conv_3): Conv1d(16, 128, kernel_size=(4,), stride=(1,))
            (char_conv_4): Conv1d(16, 256, kernel_size=(5,), stride=(1,))
            (char_conv_5): Conv1d(16, 512, kernel_size=(6,), stride=(1,))
            (char_conv_6): Conv1d(16, 1024, kernel_size=(7,), stride=(1,))
            (_highways): Highway(
              (_layers): ModuleList(
                (0): Linear(in_features=2048, out_features=4096, bias=True)
                (1): Linear(in_features=2048, out_features=4096, bias=True)
              )
            )
            (_project

In [195]:
#from transformers import get_cosine_with_hard_restarts_schedule_with_warmup, get_cosine_schedule_with_warmup
from torch.optim.lr_scheduler import LambdaLR

In [224]:
def special_lr_decay(optimizer, num_warmup_steps, num_training_steps, last_epoch=-1):
    def lr_lambda(current_step):
        if current_step < num_warmup_steps:
            return 1e-3
        elif current_step < 2 * num_warmup_steps:
            return 1e-2
        return 1

    return LambdaLR(optimizer, lr_lambda, last_epoch)

In [229]:
loss_fcn = nn.BCEWithLogitsLoss()
#optimizer = torch.optim.Adam(model.parameters(), lr = 3e-3)
optimizer = torch.optim.Adam(model.parameters(), lr = 3e-5)
#optimizer = torch.optim.SGD(model.parameters(), lr=0.005, momentum=0.9)
#scheduler = get_cosine_with_hard_restarts_schedule_with_warmup(optimizer,
                                            num_warmup_steps = 30, # Default value in run_glue.py
#                                            num_training_steps = len(train_loader.dataset))    
#scheduler = special_lr_decay(optimizer, len(train_loader), num_training_steps = len(train_loader.dataset))

In [7]:
#model = returnElmoModel()
#model.to(device)
#model.zero_grad()
#model.train()

optimizer = torch.optim.Adam(model.parameters(), lr = 3e-5)
loss_fcn = nn.BCEWithLogitsLoss()
train_loss = []
epoch = 0
train_correct = 0
acc_loss = 0
acc_avg = 0        

model.train() #Set train mode
with tqdm(total = len(valid_loader)) as epoch_pbar:                    
    for i, batch in enumerate(valid_loader):
        tokens = batch['tokens']
        tokens['tokens'] = tokens['tokens'].to(device)
        labels = batch['label'].to(device) 

        #Forward pass            
        outputs = model(tokens)
        loss = loss_fcn(outputs, labels)
        acc_loss += loss.item()                
        train_loss.append(loss.item())
        logits = torch.sigmoid(outputs)
        train_correct += torch.sum((logits >= .5) == labels).item()

        #Update progress bar
        avg_loss = acc_loss/(i + 1)                
        acc_avg = train_correct/((i+1) * 16)
        desc = f"Epoch {epoch} - loss {avg_loss:.4f} - acc {acc_avg:.4f} - lr {optimizer.param_groups[0]['lr']}"
        epoch_pbar.set_description(desc)
        epoch_pbar.update(1)

        #Compute gradient and update params
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0) #clip gradient
        optimizer.step()                
        #scheduler.step()
        optimizer.zero_grad()
        model.zero_grad()                     

optimizer = torch.optim.Adam(model.parameters(), lr = 3e-3)
scheduler = get_cosine_schedule_with_warmup(optimizer, num_warmup_steps = 0, num_training_steps = len(train_loader.dataset))

        
for epoch in range(10):
    ### TRAINING ###    
    train_loss = []
    train_correct = 0
    acc_loss = 0
    acc_avg = 0        
    
    model.train() #Set train mode
    with tqdm(total = len(train_loader)) as epoch_pbar:                    
        for i, batch in enumerate(train_loader):
            tokens = batch['tokens']
            tokens['tokens'] = tokens['tokens'].to(device)
            labels = batch['label'].to(device) 

            #Forward pass            
            outputs = model(tokens)
            loss = loss_fcn(outputs, labels)
            acc_loss += loss.item()                
            train_loss.append(loss.item())
            logits = torch.sigmoid(outputs)
            train_correct += torch.sum((logits >= .5) == labels).item()

            #Update progress bar
            avg_loss = acc_loss/(i + 1)                
            acc_avg = train_correct/((i+1) * 16)
            desc = f"Epoch {epoch} - loss {avg_loss:.4f} - acc {acc_avg:.4f} - lr {optimizer.param_groups[0]['lr']}"
            epoch_pbar.set_description(desc)
            epoch_pbar.update(1)

            #Compute gradient and update params
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0) #clip gradient
            optimizer.step()                
            scheduler.step()
            optimizer.zero_grad()
            model.zero_grad()                                                    

Epoch 0 - loss 0.6856 - acc 0.5650 - lr 3e-05: 100%|██████████| 102/102 [00:11<00:00,  8.72it/s]


NameError: name 'get_cosine_schedule_with_warmup' is not defined

In [234]:
def train_epoch(model, train_loader, epoch_pbar, optimizer, scheduler):
    train_loss = []
    train_correct = 0
    acc_loss =0
    acc_avg = 0
    for i, batch in enumerate(train_loader):
        tokens = batch['tokens']
        tokens['tokens'] = tokens['tokens'].to(device)
        labels = batch['label'].to(device) 
    
        #Forward pass            
        outputs = model(tokens)
        loss = loss_fcn(outputs, labels)
        acc_loss += loss.item()                
        train_loss.append(loss.item())
        logits = torch.sigmoid(outputs)
        train_correct += torch.sum((logits >= .5) == labels).item()
        
        #Update progress bar
        avg_loss = acc_loss/(i + 1)                
        acc_avg = train_correct/((i+1) * 16)
        desc = f"Epoch {epoch} - loss {avg_loss:.4f} - acc {acc_avg:.4f} - lr {optimizer.param_groups[0]['lr']}"
        epoch_pbar.set_description(desc)
        epoch_pbar.update(1)
        
        #Compute gradient and update params
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0) #clip gradient
        optimizer.step()                
        scheduler.step()
        optimizer.zero_grad()
        model.zero_grad()
    
    return train_loss, train_correct

In [235]:
model = returnElmoModel()
model.to(device)
model.zero_grad()
optimizer = torch.optim.Adam(model.parameters(), lr = 3e-3)
scheduler = get_cosine_schedule_with_warmup(optimizer, num_warmup_steps = 0, num_training_steps = len(train_loader.dataset))

for epoch in range(10):
    model.train()
    with tqdm(total = len(train_loader)) as epoch_pbar:                    
        train_loss, train_correct = train_epoch(model, train_loader, epoch_pbar, optimizer, scheduler)

Epoch 0 - loss 0.4596 - acc 0.7956 - lr 0.0029712684315895224: 100%|██████████| 476/476 [00:53<00:00,  8.96it/s]
Epoch 1 - loss 0.3927 - acc 0.8289 - lr 0.002885937723883112: 100%|██████████| 476/476 [00:53<00:00,  8.91it/s] 
Epoch 2 - loss 0.3633 - acc 0.8485 - lr 0.0027472903965443645: 100%|██████████| 476/476 [00:53<00:00,  8.98it/s]
Epoch 3 - loss 0.3292 - acc 0.8606 - lr 0.002560660171779821: 100%|██████████| 476/476 [00:52<00:00,  9.01it/s] 
Epoch 4 - loss 0.3007 - acc 0.8738 - lr 0.0023332266598520487: 100%|██████████| 476/476 [00:52<00:00,  9.02it/s]
Epoch 5 - loss 0.2750 - acc 0.8854 - lr 0.0020737391615688254: 100%|██████████| 476/476 [00:52<00:00,  9.01it/s]
Epoch 6 - loss 0.2297 - acc 0.9076 - lr 0.0017921800852353012: 100%|██████████| 476/476 [00:52<00:00,  9.00it/s]
Epoch 7 - loss 0.2010 - acc 0.9169 - lr 0.00149938092632073: 100%|██████████| 476/476 [00:52<00:00,  9.04it/s]  
Epoch 8 - loss 0.1848 - acc 0.9303 - lr 0.0012066055829898698: 100%|██████████| 476/476 [00:52<0

In [11]:
def valid_one_epoch(epoch, valid_size, model, device, valid_loader, epoch_pbar, 
                    optimizer, scheduler, writer, loss_fcn):
    for i, batch in enumerate(valid_iter):
        tokens = batch['tokens']
        tokens['batch'] = tokens['tokens'].to(device)
        labels = batch['label'].to(device) 

        with torch.no_grad():
            outputs = model(tokens)

        loss = loss_fcn(outputs, labels)
        valid_loss.append(loss.item())
        logits = torch.sigmoid(outputs)
        predict = logits >= .5
        valid_correct += torch.sum(predict == labels).item()

        #Add to tensorboard
        writer.add_scalar('Iteration Validation Loss', loss.item(), 
                          epoch*valid_size + i + 1)

        for t, p in zip(labels, predict):
            conf_matrix[t, p] += 1

In [28]:
def valid_one_epoch(epoch, valid_size, model, device, valid_iter, epoch_pbar, 
                    optimizer, scheduler, writer, loss_fcn, conf_matrix):
    valid_loss = []
    valid_correct = 0
        
    for i, batch in enumerate(valid_iter):
        tokens = batch['tokens']
        tokens['tokens'] = tokens['tokens'].to(device)
        labels = batch['label'].to(device)         

        with torch.no_grad():
            outputs = model(tokens)

        loss = loss_fcn(outputs, labels)
        valid_loss.append(loss.item())
        logits = torch.sigmoid(outputs)
        predict = logits >= .5
        valid_correct += torch.sum(predict == labels).item()

        #Add to tensorboard
        writer.add_scalar('Iteration Validation Loss', loss.item(), 
                          epoch*valid_size + i + 1)

        import pdb;pdb.set_trace()
        labels = labels.long()
        predict = predict.long()
        for t, p in zip(labels, predict):
            conf_matrix[t, p] += 1            
        
    return valid_loss, valid_correct, conf_matrix

In [21]:
from transformers import get_cosine_schedule_with_warmup
conf_matrix = torch.zeros(2, 2)
valid_iter = valid_loader
valid_size = len(valid_loader)
scheduler = get_cosine_schedule_with_warmup(optimizer, num_warmup_steps = 0, num_training_steps = len(train_loader.dataset))
writer = SummaryWriter(log_dir="test")

In [None]:
with tqdm(total = valid_size) as epoch_pbar:
    model.eval()                           
    
    valid_loss, valid_correct, conf_matrix = valid_one_epoch(epoch, valid_size, model, 
                                                             device, valid_iter, epoch_pbar, 
                                                             optimizer, scheduler, writer, 
                                                             loss_fcn, conf_matrix)                                    

  0%|          | 0/102 [00:00<?, ?it/s]

> <ipython-input-28-1f30c43ae8de>(25)valid_one_epoch()
-> labels = labels.long()
(Pdb) n
> <ipython-input-28-1f30c43ae8de>(26)valid_one_epoch()
-> predict = predict.long()
(Pdb) n
> <ipython-input-28-1f30c43ae8de>(27)valid_one_epoch()
-> for t, p in zip(labels, predict):
(Pdb) n
> <ipython-input-28-1f30c43ae8de>(28)valid_one_epoch()
-> conf_matrix[t, p] += 1
(Pdb) n
> <ipython-input-28-1f30c43ae8de>(27)valid_one_epoch()
-> for t, p in zip(labels, predict):
(Pdb) n
> <ipython-input-28-1f30c43ae8de>(28)valid_one_epoch()
-> conf_matrix[t, p] += 1
(Pdb) n
> <ipython-input-28-1f30c43ae8de>(27)valid_one_epoch()
-> for t, p in zip(labels, predict):
(Pdb) n
> <ipython-input-28-1f30c43ae8de>(28)valid_one_epoch()
-> conf_matrix[t, p] += 1
(Pdb) n
> <ipython-input-28-1f30c43ae8de>(27)valid_one_epoch()
-> for t, p in zip(labels, predict):
(Pdb) n
> <ipython-input-28-1f30c43ae8de>(28)valid_one_epoch()
-> conf_matrix[t, p] += 1
(Pdb) n
> <ipython-input-28-1f30c43ae8de>(27)valid_one_epoch()
-> for t,