In [1]:
from torchvision import utils
from data_loader import *
from evaluate_captions import *
import torchvision
from torchvision import transforms
import torch.nn.functional as F
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
import torch
import time
import pickle as pkl
import vocabulary_struct
import AnnoNet
#import AnnoNetRNN as AnnoNet
import csv
import numpy as np
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence

from matplotlib import pyplot as plt
from PIL import Image

In [2]:
model_path = "best_model_base3.pt"

In [3]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [4]:
with open('Vocab_File', 'rb') as f:
    vocab = pkl.load(f)

with open('TestImageIds.csv', 'r') as f:
    reader = csv.reader(f)
    testIds = list(reader)

testIds = [int(i) for i in testIds[0]]

In [5]:
batch_size = 128
#Implement normalization later
transform = transforms.Compose([
    transforms.Resize(250),
    transforms.CenterCrop(250),
    transforms.ToTensor(),
    transforms.Normalize((0.485, 0.456, 0.406), 
                             (0.229, 0.224, 0.225))
])

test_loader = get_loader(root = './data/images/test/',
                          json = './data/annotations/captions_val2014.json',
                          ids = testIds,
                          vocab = vocab,
                          transform = transform,
                          batch_size = batch_size,
                          shuffle = True,
                          num_workers = 4)

loading annotations into memory...
Done (t=0.37s)
creating index...
index created!


In [6]:
def init_weights(m):
    if isinstance(m, nn.Linear):
        torch.nn.init.xavier_uniform_(m.weight.data)
        #torch.nn.init.xavier_uniform_(m.bias.data)
        torch.nn.init.zeros_(m.bias.data)
        
epochs     = 100
#criterion = # Choose an appropriate loss function from https://pytorch.org/docs/stable/_modules/torch/nn/modules/loss.html
criterion = torch.nn.CrossEntropyLoss()
AnnoNet = AnnoNet.AnnoNet(vocab_size = len(vocab), batch_size = batch_size, embedding_dim=256,hidden_dim = 512, hidden_units=1)
AnnoNet.apply(init_weights)
#optimizer = optim.Adam(AnnoNet.parameters(), lr=1e-2)
AnnoNet.load_state_dict(torch.load(model_path))
AnnoNet.eval()

AnnoNet.to(device)

AnnoNet(
  (resnet): Sequential(
    (0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (4): Sequential(
      (0): Bottleneck(
        (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (downsample): Sequential(
          (0): Conv2d(64, 2

In [7]:
def test(batch_size):
    AnnoNet.eval()
    ts = time.time()
    rolling_loss = 0
    rolling_acc = 0
    counter = 0
    for iter, (X, tar, Y) in enumerate(test_loader):
        inputs = X.to(device)# Move your inputs onto the gpu
        labels = tar.to(device,dtype=torch.int64)# Move your labels onto the gpu

        #print("lengths: ", Y)
        outputs = AnnoNet(inputs, labels, Y)
        del inputs
        torch.cuda.empty_cache()
        labels = pack_padded_sequence(labels, Y, batch_first=True)
        loss = criterion(outputs, labels.data)
        rolling_loss += loss.item()
        del outputs,labels
        torch.cuda.empty_cache()

        if iter% 10 == 0:
            print("iter{}, loss: {}".format(iter, loss.item()))
        del loss
        torch.cuda.empty_cache()
        counter += 1
    
    print("Finish time elapsed {}".format(time.time() - ts))
    Normalizing_Factor = counter * batch_size
    rolling_loss /= Normalizing_Factor
    print("Average loss: ",rolling_loss)
    return rolling_loss

In [8]:
test(batch_size)

iter0, loss: 2.543032169342041
iter10, loss: 2.3199615478515625
iter20, loss: 2.468188524246216
iter30, loss: 2.55639386177063
iter40, loss: 2.6346330642700195
iter50, loss: 2.3716373443603516
iter60, loss: 2.3498339653015137
iter70, loss: 2.506765842437744
iter80, loss: 2.6274375915527344
iter90, loss: 2.5714190006256104
iter100, loss: 2.362164258956909
iter110, loss: 2.398698329925537
Finish time elapsed 59.1247444152832
Average loss:  0.019572997244737916


0.019572997244737916