In [1]:
import torch
import torch.nn.functional as F
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
import time
import pickle as pkl
import csv
import numpy as np
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence
from torchvision import utils
import torchvision
from torchvision import transforms

from data_loader import *
import vocabulary_struct
import AnnoNet
#import AnnoNetRNN as AnnoNet # uncomment this line for vanilla RNN

In [2]:
with open('Vocab_File', 'rb') as f:
    vocab = pkl.load(f)

In [3]:
with open('TrainImageIds.csv', 'r') as f:
    reader = csv.reader(f)
    trainIds = list(reader)

trainIds = [int(i) for i in trainIds[0]]
with open('TestImageIds.csv', 'r') as f:
    reader = csv.reader(f)
    testIds = list(reader)

testIds = [int(i) for i in testIds[0]]

In [4]:
valIds = trainIds[:int(0.2*len(trainIds))]
del trainIds[:int(0.2*len(trainIds))]

In [5]:
batch_size = 128
#Implement normalization later
transform = transforms.Compose([
    transforms.Resize(250),
    transforms.CenterCrop(250),
    transforms.ToTensor(),
    transforms.Normalize((0.485, 0.456, 0.406), 
                             (0.229, 0.224, 0.225))
])
train_loader = get_loader(root = './data/images/train/',
                          json = './data/annotations/captions_train2014.json',
                          ids = trainIds,
                          vocab = vocab,
                          transform = transform,
                          batch_size = batch_size,
                          shuffle = True,
                          num_workers = 4)
val_loader = get_loader(root = './data/images/train/',
                          json = './data/annotations/captions_train2014.json',
                          ids = valIds,
                          vocab = vocab,
                          transform = transform,
                          batch_size = batch_size,
                          shuffle = True,
                          num_workers = 4)
test_loader = get_loader(root = './data/images/test/',
                          json = './data/annotations/captions_val2014.json',
                          ids = testIds,
                          vocab = vocab,
                          transform = transform,
                          batch_size = batch_size,
                          shuffle = True,
                          num_workers = 4)

loading annotations into memory...
Done (t=0.81s)
creating index...
index created!
loading annotations into memory...
Done (t=0.94s)
creating index...
index created!
loading annotations into memory...
Done (t=0.39s)
creating index...
index created!


In [6]:
def init_weights(m):
    if isinstance(m, nn.Linear):
        torch.nn.init.xavier_uniform_(m.weight.data)
        #torch.nn.init.xavier_uniform_(m.bias.data)
        torch.nn.init.zeros_(m.bias.data)
        
epochs     = 100
#criterion = # Choose an appropriate loss function from https://pytorch.org/docs/stable/_modules/torch/nn/modules/loss.html
criterion = torch.nn.CrossEntropyLoss()
AnnoNet = AnnoNet.AnnoNet(vocab_size = len(vocab), batch_size = batch_size, embedding_dim=256,hidden_dim = 512, hidden_units=1)
AnnoNet.apply(init_weights)
optimizer = optim.Adam(AnnoNet.parameters(), lr=1e-3)

In [7]:
use_gpu = torch.cuda.is_available()
cpu_device = torch.device("cpu")
if use_gpu:
    device = torch.device("cuda:0")
    #fcn_model = fcn_model.cuda()
    #fcn_model = fcn_model.to(device)
    AnnoNet = AnnoNet.to(device)
    
def train(batch_size, check_num = 5):
    counter = 0 
    losses = []
    accuracies = []
    val_losses = []
    val_accuracies = []
    for epoch in range(epochs):
        ts = time.time()
        rolling_loss = 0
        theCounter = 0
        for iter, (X, tar, Y) in enumerate(train_loader):
            optimizer.zero_grad()
            if use_gpu:
                inputs = X.to(device)# Move your inputs onto the gpu
                labels = tar.to(device,dtype=torch.int64)# Move your labels onto the gpu
            else:
                inputs, labels = (X,tar)# Unpack variables into inputs and labels
            
            #print("lengths: ", Y)
            outputs = AnnoNet(inputs, labels, Y)
            del inputs
            torch.cuda.empty_cache()
            #output_captions, output_labels = output_captioning(outputs)
            '''if iter % 100 == 0:
                print(output_captions)'''
            #print(outputs.shape)
            labels = pack_padded_sequence(labels, Y, batch_first=True)
            #print(labels.data.shape)
            loss = criterion(outputs, labels.data)
            #Acc, _, _, _ = prediction_and_Accuracy(outputs, labels)
            del outputs,labels
            torch.cuda.empty_cache()
            loss.backward()
            optimizer.step()
            
            if iter % 10 == 0:
                print("epoch{}, iter{}, loss: {}".format(epoch, iter, loss.item()))
            
            rolling_loss += loss.item()
            del loss
            torch.cuda.empty_cache()
            theCounter += 1
            
        print("Finish epoch {}, time elapsed {}".format(epoch, time.time() - ts))
        Normalizing_Factor = theCounter * batch_size
        losses.append(rolling_loss / Normalizing_Factor)
        loss_val = val(epoch, batch_size)
        val_losses.append(loss_val)
        AnnoNet.train()
        
        #Early Stopping for validation Loss
        if epoch == 0:
            torch.save(AnnoNet.state_dict(), 'best_model.pt')
        else:
            if torch.argmin(torch.Tensor(val_losses)) == epoch:
                torch.save(AnnoNet.state_dict(), 'best_model.pt')
                counter = 0
            else:
                counter += 1
        torch.save(val_losses,"val_losses")
        
        torch.save(losses,"train_loss")
        
        if counter == check_num:
            print("early stop achieved")
            break
    
    
def val(epoch, batch_size):
    AnnoNet.eval()
    ts = time.time()
    rolling_loss = 0
    rolling_acc = 0
    counter = 0
    for iter, (X, tar, Y) in enumerate(val_loader):
        if use_gpu:
            inputs = X.to(device)# Move your inputs onto the gpu
            labels = tar.to(device,dtype=torch.int64)# Move your labels onto the gpu
        else:
            inputs, labels = (X,tar)# Unpack variables into inputs and labels

        #print("lengths: ", Y)
        outputs = AnnoNet(inputs, labels, Y)
        del inputs
        torch.cuda.empty_cache()
        labels = pack_padded_sequence(labels, Y, batch_first=True)
        loss = criterion(outputs, labels.data)
        rolling_loss += loss.item()
        del outputs,labels
        torch.cuda.empty_cache()
        #rolling_acc += Acc
        
        if iter% 10 == 0:
            print("epoch{}, iter{}, loss: {}".format(epoch, iter, loss.item()))
        del loss
        torch.cuda.empty_cache()
        counter += 1
    
    print("Finish epoch {}, time elapsed {}".format(epoch, time.time() - ts))
    Normalizing_Factor = counter * batch_size
    rolling_loss /= Normalizing_Factor
    print("Average loss: ",rolling_loss)
    
    return rolling_loss

In [8]:
train(batch_size)
print("yay")

epoch0, iter0, loss: 10.132027626037598
epoch0, iter10, loss: 6.512854099273682


KeyboardInterrupt: 