In [58]:
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import variable
import torchvision as tv
import nntools as nt
import torch

In [52]:
class NNClassifier(nt.NeuralNetwork):
    def __init__(self):
        super(NNClassifier, self).__init__()
        self.cross_entropy = nn.CrossEntropyLoss()
    
    def criterion(self, y, d):
        return self.cross_entropy(y, d)

In [72]:
class VGGNet(NNClassifier):
    def __init__(self, num_classes, fine_tuning=False):
        super(VGGNet, self).__init__()
        vgg = tv.models.vgg16_bn(pretrained=True)
        
        for param in vgg.parameters():
            param.requires_grad = fine_tuning 
            
        self.features = vgg.features
        
        self.num_fts = 512
        self.num_classes = num_classes
        
        # Linear layer goes from 512 to 1024
        self.classifier = nn.Linear(self.num_fts, self.num_classes)
        self.tanh = nn.Tanh()
        self.dropout = nn.Dropout(0.5)
        
    def forward(self, x):
        h = self.features(x)
        h = self.classifier(h.view(-1, self.num_fts)).view(-1, 196, self.num_classes)
        y = self.dropout(self.tanh(h))
        
        return y

In [3]:
class LSTM(NNClassifier): 
    def __init__(self, vocab_size, embedding_dim, num_layers=1, batch_size=100, hidden_dim=1024):
        super(LSTM,self).__init__()
        self.vocab_size = vocab_size
        self.batch_size = batch_size
        
        self.embed = nn.Linear(self.vocab_size, embedding_dim, bias=False)
        
        self.lstm = nn.LSTM(input_size=embedding_dim, hidden_size=hidden_dim, 
                            num_layers=num_layers)
    
    def forward(self, question_vec):
        h0 = torch.randn(1, 1, self.hidden_dim)
        c0 = torch.randn(1, 1, self.hidden_dim)
        
        question_embed = self.embed(question_vec)
                
        _, states = self.lstm(question_embed, (h0, c0))
                
        hidden_state,_ = states
        embedding = hidden_state[0]
        
        return embedding

NameError: name 'NNClassifier' is not defined

In [1]:
# vocab = 13744*[0]
# embedded_dim = 1000
# dropout_ratio = 0.5
# question_vec = torch.ones(len(vocab))
# print(question_vec.shape)
# lstm = LSTM(vocab, embedded_dim, dropout_ratio)
# y = lstm(question_vec)

In [183]:
class AttentionNet(NNClassifier):
    def __init__(self):
        #v_i in dxm => 1024x196 vec
        #v_q in d => 1024x1 vec
        #Wia v_i in kxm => kx196
        #will choose k => 512
        super(AttentionNet,self).__init__()
        input_features = 1024 
        output_features = 512 #k 
        image_region_size = 196
        num_classes = 1000
        
        self.q_transform1 = nn.Linear(input_features,output_features)
        self.image_transform1 = nn.Linear(input_features,output_features, bias=False)
        self.fc31 = nn.Linear(output_features,1)
        
        self.q_transform2 = nn.Linear(input_features,output_features)
        self.image_transform2 = nn.Linear(input_features,output_features, bias=False)
        self.fc32 = nn.Linear(output_features,1)
        
        self.answerDist = nn.Linear(input_features,num_classes)
        
        self.tanh = nn.Tanh()
        self.softmax = nn.Softmax()
        self.dropout = nn.Dropout(0.5)
        
    def forward(self, question_vec, image_vec):
        #do linear on fc1
        u_0 = question_vec
        q_transformation = self.q_transform1(u_0)
        #do linear on fc2 
        image_transformation = self.image_transform1(image_vec)
        #perform addition of a matrix and a vector 
        hA = self.tanh(image_transformation + q_transformation)
        #perform softmax on fc3 with result of tanh 
        x = self.fc31(hA)
        pI = self.softmax(x) #196x1
        v_0 = image_vec.view(1024, 196).matmul(pI).view(1, 1024)
        u_1 = v_0 + u_0 
        
        q_transformation2 = self.q_transform2(u_0)
        #do linear on fc2 
        image_transformation2 = self.image_transform2(image_vec)
        #perform addition of a matrix and a vector 
        hA = self.tanh(image_transformation2 + q_transformation2)
        #perform softmax on fc3 with result of tanh 
        x = self.fc32(hA)
        pI = self.softmax(x) #196x1
        v_1 = image_vec.view(1024, 196).matmul(pI).view(1, 1024)
        u_2 = v_1 + u_1 
        
        #perform softmax to get a final answer distribution
        pI = self.softmax(self.answerDist(u_2))

        return pI                

In [14]:
class SAN(NNClassifier):
    def __init__(self, num_classes, fine_tuning=False):
        super(SAN, self).__init__()
        
        