In [1]:
data = [("me gusta comer en la cafeteria".split(), "SPANISH"),
        ("Give it to me".split(), "ENGLISH"),
        ("No creo que sea una buena idea".split(), "SPANISH"),
        ("No it is not a good idea to get lost at sea".split(), "ENGLISH")]

test_data = [("Yo creo que si".split(), "SPANISH"),
             ("it is lost on me".split(), "ENGLISH")]

#owrd_to_ix maps each world in the vocab to a unique integer,which will b
#index into the Bag of worlds vector

word_to_ix = {}
for sent,_ in data + test_data:
    for word in sent:
        if word not in word_to_ix:
            word_to_ix[word] = len(word_to_ix)
            
print(word_to_ix)

VOCAB_SIZE = len(word_to_ix)
NUM_LABELS = 2

In [2]:
#Author : Robert Guthrice
import torch
import torch.autograd as autograd
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

torch.manual_seed(1)

class BoWClassifier(nn.Module): #inheriting for nn.module!
    
    def __init__(self,num_labels,vocab_size):
        # calls the init function of nn.Modul. Dont get confused by syntax
        #just always do it in an nn.Modul
        super(BoWClassifier,self).__init__()
        
        #Define the paramers that you will need .In this case,we need A and b,
        #the paramers of the affine mapping.
        #Torch defines nn.Linear(),which provides the affine map.
        self.linear = nn.Linear(vocab_size,num_labels)
        
        #NOTE! The non-linearity log sofmax does not have paramers! 
        #So we don't  need to worry about that here
        
    def forward(self,bow_vec):
        # Pass the input through the linear layer,
        # then pass that through log_softmax.
        # Many non-linearities and other functions are in torch.nn.functional
        return F.log_softmax(self.linear(bow_vec))
    

def make_bow_vector(sentence,word_to_ix):
    vec = torch.zeros(len(word_to_ix))
    for word in sentence:
        vec[word_to_ix[word]] += 1
        
    return vec.view(1,-1)

def make_target(label, label_to_ix):
    return torch.LongTensor([label_to_ix[label]])

model = BoWClassifier(NUM_LABELS, VOCAB_SIZE)

# the model knows its parameters.  The first output below is A, the second is b.
# Whenever you assign a component to a class variable in the __init__ function
# of a module, which was done with the line
# self.linear = nn.Linear(...)
# Then through some Python magic from the Pytorch devs, your module
# (in this case, BoWClassifier) will store knowledge of the nn.Linear's parameters

# To run the model, pass in a BoW vector, but wrapped in an autograd.Variable
sample = data[0]
bow_vector = make_bow_vector(sample[0], word_to_ix)
log_probs = model(autograd.Variable(bow_vector))
print(log_probs)
print(bow_vector)

Variable containing:
-0.8630 -0.5480
[torch.FloatTensor of size 1x2]



Columns 0 to 12 
    1     1     1     1     1     1     0     0     0     0     0     0     0

Columns 13 to 25 
    0     0     0     0     0     0     0     0     0     0     0     0     0
[torch.FloatTensor of size 1x26]



In [3]:
label_to_ix = {"SPANISH": 0, "ENGLISH": 1}

In [4]:
# Run on test data before we train, just to see a before-and-after
for instance, label in test_data:
    bow_vec = autograd.Variable(make_bow_vector(instance, word_to_ix))
    log_probs = model(bow_vec)
    print(log_probs)

# Print the matrix column corresponding to "creo"
print(next(model.parameters())[:, word_to_ix["creo"]])

Variable containing:
-1.2611 -0.3332
[torch.FloatTensor of size 1x2]

Variable containing:
-1.1140 -0.3978
[torch.FloatTensor of size 1x2]

Variable containing:
-0.1599
-0.1411
[torch.FloatTensor of size 2]



In [5]:
loss_function = nn.NLLLoss()
optimizer = optim.SGD(model.parameters(), lr=0.1)

# Usually you want to pass over the training data several times.
# 100 is much bigger than on a real data set, but real datasets have more than
# two instances.  Usually, somewhere between 5 and 30 epochs is reasonable.
for epoch in range(100):
    for instance, label in data:
        # Step 1. Remember that Pytorch accumulates gradients.
        # We need to clear them out before each instance
        model.zero_grad()

        # Step 2. Make our BOW vector and also we must wrap the target in a
        # Variable as an integer. For example, if the target is SPANISH, then
        # we wrap the integer 0. The loss function then knows that the 0th
        # element of the log probabilities is the log probability
        # corresponding to SPANISH
        bow_vec = autograd.Variable(make_bow_vector(instance, word_to_ix))
        target = autograd.Variable(make_target(label, label_to_ix))

        # Step 3. Run our forward pass.
        log_probs = model(bow_vec)

        # Step 4. Compute the loss, gradients, and update the parameters by
        # calling optimizer.step()
        loss = loss_function(log_probs, target)
        loss.backward()
        optimizer.step()

for instance, label in test_data:
    bow_vec = autograd.Variable(make_bow_vector(instance, word_to_ix))
    log_probs = model(bow_vec)
    print(log_probs)

# Index corresponding to Spanish goes up, English goes down!
print(next(model.parameters())[:, word_to_ix["creo"]])

Variable containing:
-0.2209 -1.6186
[torch.FloatTensor of size 1x2]

Variable containing:
-3.0979 -0.0462
[torch.FloatTensor of size 1x2]

Variable containing:
 0.3315
-0.6325
[torch.FloatTensor of size 2]

