In [1]:
import torch
import pickle as pk
import numpy as np

In [2]:
snli_train_id = pk.load(open("./hw2_data/snli_train_id.pk", "rb"))
snli_val_id = pk.load(open("./hw2_data/snli_val_id.pk", "rb"))
loaded_embeddings_ft = pk.load(open("./hw2_data/loaded_embeddings_ft.pk", "rb"))

In [5]:
from SNLI_DataLoader import SNLIDataset, snli_collate_func

train_dataset = SNLIDataset(snli_train_id)
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                                   batch_size=32,
                                                   collate_fn=snli_collate_func, shuffle=True)

val_dataset = SNLIDataset(snli_val_id)
val_loader = torch.utils.data.DataLoader(dataset=val_dataset,
                                                   batch_size=32,
                                                   collate_fn=snli_collate_func,
                                                   shuffle=True)


In [14]:
def test_model(loader, model):
    """
    Help function that tests the model's performance on a dataset
    @param: loader - data loader for the dataset to test against
    """
    correct = 0
    total = 0
    model.eval()
    for data in loader:
        prem, _, hyp, _, labels = [data[i].to(device) for i in range(len(data))]
        outputs = F.softmax(model(prem,hyp), dim=1)
        predicted = outputs.max(1, keepdim=True)[1]

        total += labels.size(0)
        correct += predicted.eq(labels.view_as(predicted)).sum().item()
    return (100 * correct / total)




In [10]:
args = {}
args['device'] = 'cpu'
args['hidden_size'] = 200
args['num_classes'] = 3
args['dropout'] = 0
args['kernel_size'] = 3

In [15]:
model = CNN(args, loaded_embeddings_ft)
device = args['device']
learning_rate = 3e-4
num_epochs = 10 # number epoch to train

# Criterion and Optimizer
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

# Train the model
total_step = len(train_loader)

for epoch in range(num_epochs):
    for i, data in enumerate(train_loader):
        prem, _, hyp, _, labels = [data[i].to(device) for i in range(len(data))]
        
        model.train()
        optimizer.zero_grad()
        
        # Forward pass
        outputs = model(prem,hyp)
        loss = criterion(outputs, labels)

        # Backward and optimize
        loss.backward()
        optimizer.step()
        # validate every 100 iterations
        if i > 0 and i % 100 == 0:
            # validate
            val_acc = test_model(val_loader, model)
            print('Epoch: [{}/{}], Step: [{}/{}], Validation Acc: {}'.format(
                       epoch+1, num_epochs, i+1, len(train_loader), val_acc))


Epoch: [1/10], Step: [101/3125], Validation Acc: 43.4
Epoch: [1/10], Step: [201/3125], Validation Acc: 49.9
Epoch: [1/10], Step: [301/3125], Validation Acc: 56.6
Epoch: [1/10], Step: [401/3125], Validation Acc: 55.7
Epoch: [1/10], Step: [501/3125], Validation Acc: 57.0
Epoch: [1/10], Step: [601/3125], Validation Acc: 57.7
Epoch: [1/10], Step: [701/3125], Validation Acc: 57.8
Epoch: [1/10], Step: [801/3125], Validation Acc: 57.7
Epoch: [1/10], Step: [901/3125], Validation Acc: 59.2
Epoch: [1/10], Step: [1001/3125], Validation Acc: 59.9
Epoch: [1/10], Step: [1101/3125], Validation Acc: 59.5
Epoch: [1/10], Step: [1201/3125], Validation Acc: 61.5
Epoch: [1/10], Step: [1301/3125], Validation Acc: 62.4
Epoch: [1/10], Step: [1401/3125], Validation Acc: 61.4
Epoch: [1/10], Step: [1501/3125], Validation Acc: 61.2
Epoch: [1/10], Step: [1601/3125], Validation Acc: 61.4
Epoch: [1/10], Step: [1701/3125], Validation Acc: 60.4
Epoch: [1/10], Step: [1801/3125], Validation Acc: 61.8
Epoch: [1/10], Step

KeyboardInterrupt: 

In [17]:
from cnn_trainer import cnn_trainer

In [18]:
hidden_size_list = [100,200,300]
dropout_list = [0,0.5]
kernel_list = [3,5]

In [20]:
args= {}
args['num_layers'] = 1
args['num_classes'] = 3
args['device'] = 'cpu'
args['batch_size'] = 64
args['optim'] = 'adam'
args['learning_rate'] = 3e-4
args['num_epochs'] = 30

for h in hidden_size_list:
    for d in dropout_list:
        for k in kernel_list:
            args['hidden_size'] = h
            args['dropout'] = d
            args['kernel_size'] = k
            trainer = cnn_trainer(snli_train_id, snli_val_id, loaded_embeddings_ft, args)
            train_list,val_list = trainer.go()

            pk.dump(val_list, open('./result/cnn_h=%s_d=%s_k=%s_val_acc'%(h,d,k), 'wb'))
            pk.dump(train_list, open('./result/cnn_h=%s_d=%s_k=%s_train_loss'%(h,d,k), 'wb'))
            print('finish hidden_size = %s, dropout = %s, val_acc = %s'%(h,d,val_list[-1]))
            break

  0%|          | 0/30 [00:00<?, ?it/s]

KeyboardInterrupt: 