In [14]:
import torch
from torch import nn

from IPython.core.interactiveshell import InteractiveShell 
InteractiveShell.ast_node_interactivity = "all"

In [5]:
class MNISTClassifier(nn.Module):

    def __init__(self):

        super().__init__()

        self.fc1 = nn.Linear(784, 392)

        self.fc2 = nn.Linear(392, 196)

        self.fc3 = nn.Linear(196, 98)

        self.fc4 = nn.Linear(98, 10)
        
        self.dropout = nn.Dropout(p=0.2) # defined our own dropout

    #### Forward pass

    def forward(self, x): # x is the input data

        x = x.view(x.shape[0], -1)   # why

        x = self.dropout(F.relu(self.fc1(x)))   #pass input x to fc1, apply relu activation, and dropout

        x = self.dropout(F.relu(self.fc2(x)))

        x = self.dropout(F.relu(self.fc3(x)))

        x = F.log_softmax(self.fc4(x), dim=1)

In [19]:
model = MNISTClassifier()

loss_function = nn.NLLLoss()

opt = optim.Adam(model.parameters(), lr=0.001)

NameError: name 'optim' is not defined

In [20]:
import numpy as np 
import pandas as pd 

import matplotlib.pyplot as plt

import torch
import torch.nn.functional as F
from torch import nn, optim

In [21]:
training_data = [
        ("Veinte paginas".lower().split(), "Spanish"),
        ("I will visit the library".lower().split(), "English"),
        ("I am reading a book".lower().split(), "English"),
        ("This is my favourite chapter".lower().split(), "English"),
        ("Estoy en la biblioteca".lower().split(), "Spanish"),
        ("Tengo un libro".lower().split(), "Spanish")
        ]

test_data = [
        ("Estoy leyendo".lower().split(), "Spanish"),
        ("This is not my favourite book".lower().split(), "English")
        ]

In [23]:
word_dict = {}
i = 0
for words, language in training_data + test_data:
    for word in words:
        if word not in word_dict:
            word_dict[word] = i
            i += 1
print(word_dict)

{'veinte': 0, 'paginas': 1, 'i': 2, 'will': 3, 'visit': 4, 'the': 5, 'library': 6, 'am': 7, 'reading': 8, 'a': 9, 'book': 10, 'this': 11, 'is': 12, 'my': 13, 'favourite': 14, 'chapter': 15, 'estoy': 16, 'en': 17, 'la': 18, 'biblioteca': 19, 'tengo': 20, 'un': 21, 'libro': 22, 'leyendo': 23, 'not': 24}


In [27]:
corpus_size = len(word_dict)
languages = 2
label_index = {"Spanish": 0, "English": 1}
print('corupus size ', corpus_size)


corupus size  25


25

In [48]:
class BagofWordsClassifier(nn.Module):  

    def __init__(self, languages, corpus_size):
        super(BagofWordsClassifier, self).__init__()
        self.linear = nn.Linear(corpus_size, languages) # The only input we need to feed or worry is corpus size
        #. Our input should be of batch_size X corpus_size. For example, [1,25] or [2,25] is good

    def forward(self, bow_vec):
        return F.log_softmax(self.linear(bow_vec), dim=1) #dim (int): A dimension along which log_softmax will be computed.

In [73]:
def make_bow_vector(sentence, word_index):
    word_vec = torch.zeros(corpus_size) # size is [25] of 0s to be returned
    for word in sentence:
        word_vec[word_dict[word]] += 1
    return word_vec.view(1, -1) # return shape of (1, 25). here 1 is the batch size

In [40]:
def make_target(label, label_index):
    return torch.LongTensor([label_index[label]]) # return shape of ([1])

number of parameters we are expecting:
- two output nodes (english and spanish)
- each node takes full input (25)
- each now Wx +b means 25 W parameters and 1 b 


In [92]:
[param.shape for param in model.parameters()]
# we expect 2 nodes. Each node has 25 w's and 1 b

[torch.Size([2, 25]), torch.Size([2])]

In [45]:
model = BagofWordsClassifier(languages, corpus_size)
loss_function = nn.NLLLoss()
optimizer = optim.SGD(model.parameters(), lr=0.1)



for epoch in range(100):
    for sentence, label in training_data:

        model.zero_grad()

        bow_vec = make_bow_vector(sentence, word_dict) # this is giving us the shape of batchX input(corpus_size)
        target = make_target(label, label_index)

        log_probs = model(bow_vec)
        

        loss = loss_function(log_probs, target)
        loss.backward()
        optimizer.step()
        
    if epoch % 10 == 0:
        print('Epoch: ',str(epoch+1),', Loss: ' + str(loss.item()))

In [None]:
def make_predictions(data): # this prediction gives probability of english and spanish both

    with torch.no_grad():    # disable updating of the parameters
        sentence = data[0]
        label = data[1]
        bow_vec = make_bow_vector(sentence, word_dict)
        log_probs = model(bow_vec)
        print(sentence)
        print(label + ':')
        print(np.exp(log_probs))

In [87]:
make_predictions(test_data[0])
make_predictions(test_data[1])

['estoy', 'leyendo']
Spanish:
tensor([[0.9169, 0.0831]])
['this', 'is', 'not', 'my', 'favourite', 'book']
English:
tensor([[0.0064, 0.9936]])


In [103]:
def return_params(word): 
    index = word_dict[word]
    for p in model.parameters():
        dims = len(p.size())
        if dims == 2:
            print(word + ':', ' index=', index, ' :')
            print('Spanish Parameter = ' + str(p[0][index].item()))
            print('English Parameter = ' + str(p[1][index].item()))
            print('\n')
            
return_params('estoy')
return_params('book')

estoy:  index= 16  :
Spanish Parameter = 0.8518858551979065
English Parameter = -0.5249736309051514


book:  index= 10  :
Spanish Parameter = -0.5040857195854187
English Parameter = 0.4929571747779846


