In [1]:
import pandas as pd
import numpy as np
import os
import sys
import tabulate
import time
import torch
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset
from tqdm import tqdm_notebook as tqdm

# from torchtext import data
# from torchtext import datasets
# from torchtext.vocab import Vectors, GloVe

import curves
import data
import load_data
import models
import utils

In [2]:
print(torch.__version__)

1.0.1


In [3]:
i = 5
layer = 0
suffix = ''
'weight_ih_l{}{}_{}'.format(layer, suffix, i)

'weight_ih_l0_5'

In [4]:
c_dir = 'saved_models/' # training directory

data_path = './data/' # path to datasets location 
# dataset = 'CIFAR10'   # dataset name
# transform = 'VGG'     # transform name
batch_size = 32       # input batch size
num_workers = 4       # number of workers
use_test = False      # switches between validation and test set (default: validation)
model_name = 'LSTMClassifier'  # model name

init_start = './saved_models/LSTMClassifier-6.pt'     # checkpoint to init start point. metavar='CKPT'
init_end = './saved_models/LSTMClassifier2-4.pt'       # checkpoint to init end point. metavar='CKPT'

fix_start = True      # fix start point
fix_end = True        # fix end point

wd = 1e-5             # weight decay
Momentum = 0.9        # SGD momentum
LR = 1e-3             # initial learning rate
resume = None         # checkpoint to resume training from. metavar='CKPT'
Epochs = 200          # number of epochs to train

save_freq = 1        # save frequency
num_bends = 3         # number of curve bends
curve_type = 'PolyChain'     # Bezier/PolyChain
reg = False
init_linear = True    # linear initialization of intermediate points

seed_val = 37          # random seed 

In [5]:
TEXT, vocab_size, num_classes, word_embeddings, train_loader, valid_loader, test_loader = \
                                load_data.load_dataset(batch_size=batch_size)

Length of Text Vocabulary: 135872
Vector size of Text Vocabulary:  torch.Size([135872, 300])
Label Length: 4


In [6]:
os.makedirs(c_dir, exist_ok=True)
#TEXT, vocab_size, num_classes, word_embeddings, train_loader, valid_loader, test_loader = load_data.load_dataset(batch_size=batch_size)

#num_classes = 4
#learning_rate = 2e-5
kwargs = {
    'batch_size': batch_size,
    'hidden_size': 256,
    'embedding_length': 300,
    'vocab_size': vocab_size,
    'weights': word_embeddings
}


architecture = getattr(models, model_name)

torch.backends.cudnn.benchmark = True
torch.manual_seed(seed_val)
torch.cuda.manual_seed(seed_val)

if curve_type is None:
    model = architecture.base(num_classes=num_classes, **kwargs)
else:
    curve = getattr(curves, curve_type)
    model = curves.CurveNet(
        num_classes,
        curve,
        architecture.curve,
        num_bends,
        fix_start,
        fix_end,
        architecture_kwargs=kwargs,
    )
    base_model = None
    if resume is None:
        for path, k in [(init_start, 0), (init_end, num_bends - 1)]:
            if path is not None:
                if base_model is None:
                    base_model = architecture.base(num_classes=num_classes, **kwargs)
                checkpoint = torch.load(path)
                print('Loading %s as point #%d' % (path, k))
                base_model.load_state_dict(checkpoint['model_state'])
                model.import_base_parameters(base_model, k)
        if init_linear:
            print('Linear initialization.')
            model.init_linear()
model.cuda()
loss_fn = torch.nn.CrossEntropyLoss()


regularizer = None if curve_type is None else curves.l2_regularizer(wd)
# optimizer = torch.optim.SGD(
#     filter(lambda param: param.requires_grad, model.parameters()),
#     lr=LR,
#     momentum=Momentum,
#     weight_decay=wd if curve_type is None else 0.0
# )
optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=0.001)

start_epoch = 1
if resume is not None:
    print('Resume training from %s' % resume)
    checkpoint = torch.load(resume)
    start_epoch = checkpoint['epoch'] + 1
    model.load_state_dict(checkpoint['model_state'])
    optimizer.load_state_dict(checkpoint['optimizer_state'])

name = 'LSTMClassifier_curve'
utils.save_checkpoint(
    c_dir,
    start_epoch - 1,
    name +'_regularizer' if reg else name,
    model_state=model.state_dict(),
    optimizer_state=optimizer.state_dict()
)

for epoch in range(start_epoch, Epochs + 1):
    time_ep = time.time()

#     lr = utils.learning_rate_schedule(LR, epoch, Epochs)
#     utils.adjust_learning_rate(optimizer, lr)
    
    
    train_res = utils.train_model(train_loader, model, optimizer, loss_fn, epoch, batch_size, regularizer)
    val_res = utils.eval_model(valid_loader, model, loss_fn, batch_size, regularizer)
#     if curve_type is None:
#         test_res = eval_model(test_loader, model, loss_fn, regularizer)

    if epoch % save_freq == 0:
        utils.save_checkpoint(
            c_dir,
            epoch,
            name + '_regularizer' if reg else name,
            model_state=model.state_dict(),
            optimizer_state=optimizer.state_dict()
        )

    time_ep = time.time() - time_ep
    print('Epoch: {:02}, Train Loss: {:.3f}, Train Acc: {:.2f}%, Val. Loss: {:.3f}, Val. Acc: {:.2f}%'\
         .format(epoch, train_res['loss'], train_res['acc'], val_res['nll'], val_res['acc']))


if Epochs % save_freq != 0:
    utils.save_checkpoint(
        c_dir,
        Epochs,
        name + '_regularizer' if reg else name,
        model_state=model.state_dict(),
        optimizer_state=optimizer.state_dict()
    )

Loading ./saved_models/LSTMClassifier-6.pt as point #0
> [0;32m/home/hakobtamazyan/dnn-mode-connectivity/curves.py[0m(522)[0;36mimport_base_parameters[0;34m()[0m
[0;32m    520 [0;31m        [0;32mfrom[0m [0mIPython[0m[0;34m.[0m[0mcore[0m[0;34m.[0m[0mdebugger[0m [0;32mimport[0m [0mset_trace[0m[0;34m[0m[0m
[0m[0;32m    521 [0;31m        [0mset_trace[0m[0;34m([0m[0;34m)[0m[0;34m[0m[0m
[0m[0;32m--> 522 [0;31m        [0mbase_parameters[0m [0;34m=[0m [0mbase_model[0m[0;34m.[0m[0mparameters[0m[0;34m([0m[0;34m)[0m[0;34m[0m[0m
[0m[0;32m    523 [0;31m        [0;32mfor[0m [0mparameter[0m[0;34m,[0m [0mbase_parameter[0m [0;32min[0m [0mzip[0m[0;34m([0m[0mparameters[0m[0;34m,[0m [0mbase_parameters[0m[0;34m)[0m[0;34m:[0m[0;34m[0m[0m
[0m[0;32m    524 [0;31m            [0mparameter[0m[0;34m.[0m[0mdata[0m[0;34m.[0m[0mcopy_[0m[0;34m([0m[0mbase_parameter[0m[0;34m.[0m[0mdata[0m[0;34m)[0m[0;34m[0m

BdbQuit: 

In [19]:
model_name +'2'+'_regularizer' if reg else model_name,

('LSTMClassifier',)

In [9]:
for epoch in np.arange(2,7):
    name = 'LSTMClassifier2'
    checkpoint = torch.load(os.path.join(c_dir, '%s-%d.pt' % (name, epoch)))
    start_epoch = checkpoint['epoch'] + 1
    model.load_state_dict(checkpoint['model_state'])
    optimizer.load_state_dict(checkpoint['optimizer_state'])

    res = eval_model(valid_loader, model, loss_fn, regularizer)
    print('Epoch: {:02}, Loss: {:.3f}, Acc: {:.2f}%'.format(epoch, res['nll'], res['acc']))

HBox(children=(IntProgress(value=0, max=563), HTML(value='')))

Epoch: 02, Loss: 0.270, Acc: 89.94%


HBox(children=(IntProgress(value=0, max=563), HTML(value='')))

Epoch: 03, Loss: 0.252, Acc: 90.93%


HBox(children=(IntProgress(value=0, max=563), HTML(value='')))

Epoch: 04, Loss: 0.246, Acc: 91.09%


HBox(children=(IntProgress(value=0, max=563), HTML(value='')))

Epoch: 05, Loss: 0.251, Acc: 91.28%


HBox(children=(IntProgress(value=0, max=563), HTML(value='')))

Epoch: 06, Loss: 0.281, Acc: 90.71%


In [17]:
# TEXT, vocab_size, num_classes, word_embeddings, train_loader, valid_loader, test_loader = load_data.load_dataset()
# for i, x in enumerate(train_loader):
#     if i in range(3,7):
#         break
#     print(x.text)

In [18]:
# TEXT, vocab_size, num_classes, word_embeddings, train_loader, valid_loader, test_loader = load_data.load_dataset_imdb()
# for i, x in enumerate(train_loader):
#     if i in range(3,7):
#         break
#     print(x.text)

In [27]:
def train_model(train_loader, model, optimizer, loss_fn, epoch, regularizer=None):
    total_epoch_loss = 0
    total_epoch_acc = 0
    model.cuda()
    #optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()))
    
    steps = 0
    model.train()
    pbar = tqdm(enumerate(train_loader), total=len(train_loader))
    for i, batch in pbar:
        text = batch.text[0]
        target = batch.label
        target = torch.autograd.Variable(target).long()
        if torch.cuda.is_available():
            text = text.cuda()
            target = target.cuda()
        if (text.size()[0] is not batch_size):# One of the batch returned by BucketIterator has length different than batch_size.
            continue
            
        optimizer.zero_grad()
        prediction = model(text)
        loss = loss_fn(prediction, target)
        if regularizer is not None:
            loss += regularizer(model)

        num_corrects = (torch.max(prediction, 1)[1].view(target.size()).data == target.data).float().sum()
        acc = 100.0 * num_corrects/len(batch)
        
        loss.backward()
        utils.clip_gradient(model, 1e-2)
        optimizer.step()
        steps += 1
        
#         if steps % 100 == 0:
#             print ('Epoch: {}, Iter: {}, Training Loss: {:.4f}, Training acc: {:.2f}%'\
#                    .format(epoch, i+1, loss.item(), acc.item()))
        
        total_epoch_loss += loss.item()
        total_epoch_acc += acc.item()
        pbar.set_description_str('[TRAIN] Epoch: {}, Train Loss: {:.4f}, Train acc: {:.2f}%'\
            .format(epoch,
                   total_epoch_loss / (i + 1),
                   total_epoch_acc / (i + 1)))
    
    return {
        'loss': total_epoch_loss/len(train_loader),
        'acc': total_epoch_acc/len(train_loader)
    }

def eval_model(val_loader, model, loss_fn, batchregularizer=None):
    total_epoch_loss = 0
    total_epoch_nll = 0
    total_epoch_acc = 0
    
    model.eval()
    with torch.no_grad():
        for idx, batch in tqdm(enumerate(val_loader), total=len(val_loader)):
            text = batch.text[0]
            if (text.size()[0] is not batch_size):
                continue
                
            target = batch.label
            target = torch.autograd.Variable(target).long()
            if torch.cuda.is_available():
                text = text.cuda()
                target = target.cuda()
            prediction = model(text)
            
            nll = loss_fn(prediction, target)
            loss = nll.clone()
            if regularizer is not None:
                loss += regularizer(model)
                
            num_corrects = (torch.max(prediction, 1)[1].view(target.size()).data == target.data).sum()
            acc = 100.0 * num_corrects/len(batch)
            total_epoch_loss += loss.item()
            total_epoch_nll += nll.item()
            total_epoch_acc += acc.item()

    return {
        'nll': total_epoch_nll/len(val_loader),
        'loss': total_epoch_loss/len(val_loader),
        'acc': total_epoch_acc/len(val_loader)
    }


RuntimeError: dimension out of range (expected to be in range of [-1, 0], but got 1)