In [1]:

# Author: Robert Guthrie

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

torch.manual_seed(1)


<torch._C.Generator at 0x7f95fdc27130>


# Example : logistic regression bag-of-words classifier 

"""
Read it from here 
https://pytorch.org/tutorials/beginner/nlp/deep_learning_tutorial.html

hello -- index 0 -- bow vector [4, 0]
world -- index 1 -- bow vector [2, 2]

[count(hello), count(world)]

this vector is x
output of the network is log softmax(Ax + b)
"""





In [2]:

data = [("me gusta comer en la cafeteria".split(), "SPANISH"),
        ("Give it to me".split(), "ENGLISH"),
        ("No creo que sea una buena idea".split(), "SPANISH"),
        ("No it is not a good idea to get lost at sea".split(), "ENGLISH")]

test_data = [("Yo creo que si".split(), "SPANISH"),
             ("it is lost on me".split(), "ENGLISH")]


print(data)

print(test_data)



[(['me', 'gusta', 'comer', 'en', 'la', 'cafeteria'], 'SPANISH'), (['Give', 'it', 'to', 'me'], 'ENGLISH'), (['No', 'creo', 'que', 'sea', 'una', 'buena', 'idea'], 'SPANISH'), (['No', 'it', 'is', 'not', 'a', 'good', 'idea', 'to', 'get', 'lost', 'at', 'sea'], 'ENGLISH')]
[(['Yo', 'creo', 'que', 'si'], 'SPANISH'), (['it', 'is', 'lost', 'on', 'me'], 'ENGLISH')]


In [3]:

# word_to_ix maps each word in the vocab to a unique integer, which will be its
# index into the Bag of words vector

word_to_ix = {}
for sent, _ in data + test_data:
    # the first index is basically the collection of words 
    # the second index is the language 
    #print(sent)
    for word in sent:
        #print(word)
        if word not in word_to_ix:
            # the item key is the word itself
            # the number to the key is a unique no
            # here we are just saying the index is the next one 
            # of the previous highes index
            # suppose already words in dict is 2 then index = 0, 1 
            # so when the third word comes we put it as index 2
            # i.e. len(word_to_ix)
            word_to_ix[word] = len(word_to_ix)
            
print(word_to_ix)            

VOCAB_SIZE = len(word_to_ix)
NUM_LABELS = 2

"""
here basically they are mapping words to languages 
words are expressed in terms of vocabulary
and there are two languages so NUM_LABELS = 2
"""


{'si': 24, 'Yo': 23, 'lost': 21, 'sea': 12, 'a': 18, 'buena': 14, 'comer': 2, 'is': 16, 'una': 13, 'not': 17, 'me': 0, 'good': 19, 'on': 25, 'at': 22, 'creo': 10, 'No': 9, 'cafeteria': 5, 'gusta': 1, 'idea': 15, 'que': 11, 'la': 4, 'get': 20, 'en': 3, 'it': 7, 'Give': 6, 'to': 8}


'\nhere basically they are mapping words to languages \nwords are expressed in terms of vocabulary\nand there are two languages so NUM_LABELS = 2\n'

In [4]:

class BoWClassifier(nn.Module):  # inheriting from nn.Module!

    def __init__(self, num_labels, vocab_size):
        # calls the init function of nn.Module.  
        # Dont get confused by syntax,
        # just always do it in an nn.Module
        super(BoWClassifier, self).__init__()

        # Define the parameters that you will need.  
        #In this case, we need A and b,
        # the parameters of the affine mapping.
        # Torch defines nn.Linear(), which provides the affine map.
        # Make sure you understand why the input dimension is vocab_size
        # and the output is num_labels!
        self.linear = nn.Linear(vocab_size, num_labels)

        # NOTE! The non-linearity log softmax does not 
        #have parameters! So we don't need
        # to worry about that here

    def forward(self, bow_vec):
        # Pass the input through the linear layer,
        # then pass that through log_softmax.
        # Many non-linearities and other functions 
        # are in torch.nn.functional
        return F.log_softmax(self.linear(bow_vec), dim=1)



In [5]:

def make_bow_vector(sentence, word_to_ix):
    """
    the program receives a sentence which is a list of words 
    whose vec representation it wants as output
    
    word_to_ix is which word means which index ?
    so we write a loop which goes over all the individual words in 
    the sentence and retrieves the index of the particular word as 
    
    word_to_ix[word]
    
    then whatever index is retrieved the count is increased 
    so a sentence will have a representation of voc length
    with each position signifying the count of words occurance 
    the position is determined by the index stored in word_to_ix
    
    """
    vec = torch.zeros(len(word_to_ix))
    for word in sentence:
        vec[word_to_ix[word]] += 1
    return vec.view(1, -1)


def make_target(label, label_to_ix):
    """
    a same function as above 
    this returns the index of the labels 
    so label_to_ix is like 
    '0' : english , '1' : spanish 
    
    label is the language whose index we desire
    """
    return torch.LongTensor([label_to_ix[label]])


In [6]:

model = BoWClassifier(NUM_LABELS, VOCAB_SIZE)

print(model)


BoWClassifier(
  (linear): Linear(in_features=26, out_features=2, bias=True)
)


In [7]:

# the model knows its parameters.  
#The first output below is A, the second is b.
# Whenever you assign a component to a class variable 
#in the __init__ function
# of a module, which was done with the line
# self.linear = nn.Linear(...)
# Then through some Python magic from the PyTorch devs, your module
# (in this case, BoWClassifier) will store knowledge 
#of the nn.Linear's parameters
for param in model.parameters():
    print(param.shape)


torch.Size([2, 26])
torch.Size([2])


In [8]:

# To run the model, pass in a BoW vector
# Here we don't need to train, so the code is wrapped in torch.no_grad()
with torch.no_grad():
    sample = data[0]
    
    print(sample)
    
    print(sample[0], sample[1])
    
    bow_vector = make_bow_vector(sample[0], word_to_ix)
    
    log_probs = model(bow_vector)
    
    print(log_probs)


(['me', 'gusta', 'comer', 'en', 'la', 'cafeteria'], 'SPANISH')
['me', 'gusta', 'comer', 'en', 'la', 'cafeteria'] SPANISH
tensor([[-0.8195, -0.5810]])


In [9]:
"""
Which of the above values corresponds to the log probability 
of ENGLISH, and which to SPANISH? We never defined it, but 
we need to if we want to train the thing.
"""
label_to_ix = {"SPANISH": 0, "ENGLISH": 1}


In [10]:

# Run on test data before we train, 
# just to see a before-and-after
with torch.no_grad():
    for instance, label in test_data:
        bow_vec = make_bow_vector(instance, word_to_ix)
        log_probs = model(bow_vec)
        print(log_probs)

        
# Print the matrix column corresponding to "creo"
print(next(model.parameters())[:, word_to_ix["creo"]])


# as we have used the log softmax we use NLL loss
# had we not used the log softmax we would have used 
# the cross-entropy loss
loss_function = nn.NLLLoss()
optimizer = optim.SGD(model.parameters(), lr=0.1)


# Usually you want to pass over the training data several times.
# 100 is much bigger than on a real data set, but real datasets 
# have more than two instances.  
# Usually, somewhere between 5 and 30 epochs is reasonable.
for epoch in range(100):
    
    for instance, label in data:
        # Step 1. Remember that PyTorch accumulates gradients.
        # We need to clear them out before each instance
        
        model.zero_grad()

        # Step 2. Make our BOW vector and also 
        # we must wrap the target in a Tensor 
        # as an integer. For example, if the 
        # target is SPANISH, then we wrap the 
        # integer 0. The loss function then knows that the 0th
        # element of the log probabilities is the log probability
        # corresponding to SPANISH
        
        bow_vec = make_bow_vector(instance, word_to_ix)
        target = make_target(label, label_to_ix)

        # Step 3. Run our forward pass.

        log_probs = model(bow_vec)

        # Step 4. Compute the loss, gradients, 
        # and update the parameters by
        # calling optimizer.step()
        
        loss = loss_function(log_probs, target)
        
        loss.backward()
        
        optimizer.step()
        
        
        

with torch.no_grad():
    for instance, label in test_data:
        bow_vec = make_bow_vector(instance, word_to_ix)
        log_probs = model(bow_vec)
        print(log_probs)

# Index corresponding to Spanish goes up, English goes down!
print(next(model.parameters())[:, word_to_ix["creo"]])



tensor([[-0.6250, -0.7662]])
tensor([[-0.5870, -0.8119]])
tensor([0.0544, 0.1722], grad_fn=<SelectBackward>)
tensor([[-0.1210, -2.1721]])
tensor([[-2.7767, -0.0643]])
tensor([ 0.5004, -0.2738], grad_fn=<SelectBackward>)
