In [2]:
import torch
import numpy as np
from utils import * # data loader

In [3]:
# load data
(x_train, y_train), (x_val, y_val), (i2w, w2i), numcls = load_imdb(final=False) # if final is True, train and test set is returned. Else validation data

print('x_train:', len(x_train))
print('y_train:', len(y_train))

x_train: 20000
y_train: 20000


The return values are as follows:

● x_train A python list of lists of integers. Each integer represents a word. Sorted
from short to long.

● y_train The corresponding class labels: 0 for positive, 1 for negative.

● x_val Test/validation data. Laid out the same as x_train.

● y_val Test/validation labels

● i2w A list of strings mapping the integers in the sequences to their original words.
i2w[141] returns the string containing word 141.

● w2i A dictionary mapping the words to their indices. w2i['film'] returns the index
for the word "film".

In [None]:
print(x_train[0]) # each integer represents a word, shorted from short to long
print(y_train[0]) # 0 or 1, 0 means negative, 1 means positive

In [None]:
print(i2w) # index to word (list)
print(w2i) # word to index (dict)

In [None]:
print(w2i['pad']) # uses to fill the sentence to the same length with
print(w2i['start'])
print(w2i['end'])

In [None]:
for sentence in x_train[:10]:
    for word_index in sentence:
        print(i2w[word_index], end=' ')
    print(len(sentence))

In [4]:
def padding(x, y, w2i, batch_size = 16):
    
    batches_x = []
    batches_y = []
    
    # step over x met steps of batch_size
    for i in range(0, len(x), batch_size):
        
        start = i
        end = i + batch_size
        
        # get the batch
        batch_x = x[start:end]
        batch_y = y[start:end]
        
        
        
        batch = []
        for i, sentence in enumerate(batch_x):
            longest_sentence = max([len(sentence) for sentence in batch_x])
            if len(sentence) < longest_sentence:
                sentence += [w2i['pad']] * (longest_sentence - len(sentence))

            # print(len(sentence))
            batch.append(sentence)
        
        batches_x.append(batch)
        batches_y.append(batch_y)
        
    # transform all batches to tensors
    batches_x = [torch.tensor(batch, dtype = torch.long) for batch in batches_x]
    batches_y = [torch.tensor(batch, dtype = torch.long) for batch in batches_y]
        
    return batches_x, batches_y
                

In [5]:
# create batches
batch_size = 16
batches_x, batches_y = padding(x_train, y_train, w2i, batch_size)

Question 2

In [6]:
class MLP(torch.nn.Module):
    
    def __init__(self, w2i, embedding_dim = 300, hidden_size = 300):
        super(MLP, self).__init__()
        num_embeddings = len(w2i)
        self.embedding =  torch.nn.Embedding(num_embeddings, embedding_dim)
        self.hidden = torch.nn.Linear(embedding_dim, hidden_size)
        self.output = torch.nn.Linear(hidden_size, 2)

    def forward(self, x):
        emb = self.embedding(x)
        k = self.hidden(emb)
        h = torch.nn.functional.relu(k)
        o, _ = torch.max(h, dim=1)
        y = self.output(o)
        return y 
    

Question 3

In [7]:
def train(batches_x, batches_y, model, epochs = 5, optimizer = 'Adam', lr=0.001):
    
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    
    model = model.to(device)

    batches_x = [batch.to(device) for batch in batches_x]
    batches_y = [batch.to(device) for batch in batches_y]

    if optimizer == 'Adam':
        optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    elif optimizer == 'SGD':
        optimizer = torch.optim.SGD(model.parameters(), lr=lr)
        
    optimizer.zero_grad()
    
    for epoch in range(epochs):
    
        losses = []
        accuracies = []
        for i, batch in enumerate(batches_x):
            if i % 100 == 0: print(i)
            predicted_y = model(batch)
            loss = torch.nn.functional.cross_entropy(predicted_y, batches_y[i])
            loss.backward()
            optimizer.step()  
            losses.append(loss.item())
            
            # get index of the max value (0 or 1)
            predicted_y = predicted_y.argmax(dim=1)
            
            # calculate accuracy: number of correct predictions / number of predictions
            n_correct = (predicted_y == batches_y[i]).sum().item()
            accuracy = n_correct / len(predicted_y)
            accuracies.append(accuracy)
            
        print('Epoch: ', epoch, 'Loss: ', np.mean(losses), 'Accuracy: ', np.mean(accuracies))  

In [None]:
model = MLP(w2i)
train(batches_x, batches_y, model, epochs = 10, optimizer = 'Adam', lr=0.001)

Question 4

In [8]:
class Elman(torch.nn.Module):
    def __init__(self, insize=300, outsize=300, hsize=300):
        super().__init__()
        self.lin1 = torch.nn.Linear(insize + hsize, hsize)  # Input-to-hidden layer
        self.lin2 = torch.nn.Linear(hsize, outsize)  # Hidden-to-output layer

    def forward(self, x, hidden=None):
        batch_size, sequence_size, embedding_size = x.size()
        if hidden is None:
            hidden = torch.zeros(batch_size, embedding_size, dtype=torch.float)
        
        outs = []
        for i in range(sequence_size): 
            inp = torch.cat([x[:, i, :], hidden], dim=1)
            hidden = torch.nn.functional.relu(self.lin1(inp))
            out = self.lin2(hidden)
            outs.append(out[:, None, :])

        return torch.cat(outs, dim=1), hidden
    

In [9]:
class MLP2(torch.nn.Module):
    
    def __init__(self, w2i, embedding_dim = 300, hidden_size = 300):
        super(MLP2, self).__init__()
        num_embeddings = len(w2i)
        self.embedding =  torch.nn.Embedding(num_embeddings, embedding_dim)
        self.hidden = Elman(embedding_dim, hidden_size)
        self.output = torch.nn.Linear(hidden_size, 2)

    def forward(self, x):
        emb = self.embedding(x)
        tensors, hidden_layer = self.hidden(emb)
        h = torch.nn.functional.relu(tensors)
        o, _ = torch.max(h, dim=1)
        y = self.output(o)
        return y 

In [None]:
model = MLP2(w2i)
train(batches_x[:2], batches_y[:2], model, epochs = 10, optimizer = 'Adam', lr=0.001)

Question 5

In [20]:
# implementaion of the Elman network with pytorch modules
class Elman_torch(torch.nn.Module):
    
    def __init__(self, w2i, embedding_dim = 300, hidden_size = 300):
        super(Elman_torch, self).__init__()
        num_embeddings = len(w2i)
        self.embedding =  torch.nn.Embedding(num_embeddings, embedding_dim)
        self.hidden1 = torch.nn.RNN(embedding_dim, hidden_size, batch_first=True)
        # self.hidden2 = torch.nn.RNN(embedding_dim, hidden_size)
        self.output = torch.nn.Linear(hidden_size, 2)

    def forward(self, x):
        emb = self.embedding(x)
        tensors, hidden = self.hidden1(emb)
        # tensors, hidden = self.hidden2(tensors, hidden)
        o, _ = torch.max(tensors, dim=1)
        y = self.output(o)
        return y 
    
# NOTE: torch.nn.RNN computes only one layer, which is already activated. So no need to use activation function in the forward method. 
# There are 2 hidden layer since our implementation of the Elman network has 2 hidden layers.

In [21]:
# implementaion of the LSTM network with pytorch modules
class LSTM(torch.nn.Module):
    
    def __init__(self, w2i, embedding_dim = 300, hidden_size = 300):
        super(LSTM, self).__init__()
        num_embeddings = len(w2i)
        self.embedding =  torch.nn.Embedding(num_embeddings, embedding_dim)
        self.hidden = torch.nn.LSTM(embedding_dim, hidden_size, batch_first=True)
        self.output = torch.nn.Linear(hidden_size, 2)

    def forward(self, x):
        emb = self.embedding(x)
        tensors, hidden_layer = self.hidden(emb)
        h = torch.nn.functional.relu(tensors)
        o, _ = torch.max(h, dim=1)
        y = self.output(o)
        return y 

In [None]:
# elman model
model = Elman_torch(w2i)
train(batches_x[:2], batches_y[:2], model, epochs = 2, optimizer = 'Adam', lr=0.001)

# lstm model
model = LSTM(w2i)
train(batches_x[:2], batches_y[:2], model, epochs = 2, optimizer = 'Adam', lr=0.001)