In [1]:
from collections import defaultdict
import numpy as np

import torch
from torch import nn
from torch.optim import Adam

In [2]:
device = "mps" if torch.backends.mps.is_available() else "cpu"

# Deep CBoW Model

In [3]:
class DeepCBoW(nn.Module):
    def __init__(self, vocab_size, embedding_dim, n_classes, hidden_layers_sizes, device):
        super(DeepCBoW, self).__init__()
        
        self.vocab_size = vocab_size
        self.embedding_dim = embedding_dim
        self.n_classes = n_classes
        self.hidden_layers_sizes = hidden_layers_sizes
        self.n_hidden_layers = len(hidden_layers_sizes)
        self.device = device
        
        # Layers ---------------------------------------------------------------------
        # Could use EmbeddingBag but not available for mps for the time-being
        self.embedding = nn.Embedding(self.vocab_size, self.embedding_dim, device=self.device)
        nn.init.xavier_uniform_(self.embedding.weight)
        
        # I could have used LazyLinear to not specify the input size
        # but then I wouldn't be able to initialize the weights using
        # xavier_uniform_
        self.linears = nn.ModuleList([
            nn.Linear(embedding_dim if i==0 else self.hidden_layers_sizes[i-1], self.hidden_layers_sizes[i], device=self.device) 
            for i in range(self.n_hidden_layers)
        ])
        
        for i in range(self.n_hidden_layers):
            nn.init.xavier_uniform_(self.linears[i].weight)
        
        self.output_layer = nn.Linear(self.hidden_layers_sizes[-1], self.n_classes, device=self.device)
    
    
    def forward(self, words):
        emb = self.embedding(words) # size: num_words * embedding_dim
        out = emb.sum(dim=0, keepdims=True) # size: 1 * embedding_dim
        for layer in self.linears:
            out = layer(out)
            out = torch.tanh(out)
        # size : 1 * last hidden layer size
        out = self.output_layer(out) # size: 1 * n_classes
        return out

# Applying the model

In [4]:
w2i = defaultdict(lambda: len(w2i))
t2i = defaultdict(lambda: len(t2i))
UNK = w2i["<unk>"]

In [5]:
def read_dataset(path: str):
    with open(path, "r") as f:
        try:
            for line in f:
                line = f.readline().lower().strip().split(" ||| ")
                text_class, text = line[0], line[1]
                yield ([w2i[word] for word in text.split(" ")], t2i[text_class])
        except:
            pass

In [6]:
train = list(read_dataset("../data/classes/train.txt"))
vocab_size = len(w2i)
n_classes = len(t2i)

In [7]:
vocab_size

11402

In [8]:
n_classes

5

In [9]:
w2i = defaultdict(lambda: UNK, w2i)
dev = list(read_dataset("../data/classes/dev.txt"))

In [10]:
dcbow_model = DeepCBoW(vocab_size, 64, n_classes, [32, 16], device=device)

In [11]:
loss_criterion = nn.CrossEntropyLoss()
optimizer = Adam(dcbow_model.parameters())

In [12]:
# Just 10 epochs as the goal is not to train a real model
# but just to see if the implementation is working
for i in range(10):
    train_loss = 0
    test_accuracy = 0
    for words, sentence_class in train:
        words = torch.tensor(words, device=device)
        sentence_class = torch.tensor([sentence_class], device=device)
        predictions = dcbow_model(words)
        loss = loss_criterion(predictions, sentence_class)
        train_loss += loss.item()
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    print(f"Iteration {i} - Train loss: {train_loss/len(train)}")
    with torch.no_grad():
        for words, sentence_class in dev:
            words = torch.tensor(words, device=device)
            predictions = dcbow_model(words)
            predicted_class = np.argmax(predictions.detach().cpu().numpy())
            if predicted_class == sentence_class:
                test_accuracy += 1
    print(f"Iteration {i} - Test accuracy: {test_accuracy/len(dev)}")

Iteration 0 - Train loss: 1.2302827141230248
Iteration 0 - Test accuracy: 0.24
Iteration 1 - Train loss: 0.81398574184724
Iteration 1 - Test accuracy: 0.28545454545454546
Iteration 2 - Train loss: 0.37563385079136946
Iteration 2 - Test accuracy: 0.3090909090909091
Iteration 3 - Train loss: 0.18602406783273603
Iteration 3 - Test accuracy: 0.3018181818181818
Iteration 4 - Train loss: 0.09241373560736689
Iteration 4 - Test accuracy: 0.29454545454545455
Iteration 5 - Train loss: 0.054121699393465277
Iteration 5 - Test accuracy: 0.29454545454545455
Iteration 6 - Train loss: 0.037876522440588874
Iteration 6 - Test accuracy: 0.3145454545454546
Iteration 7 - Train loss: 0.019210582926925204
Iteration 7 - Test accuracy: 0.25272727272727274
Iteration 8 - Train loss: 0.01291785676604353
Iteration 8 - Test accuracy: 0.29818181818181816
Iteration 9 - Train loss: 0.007584351753250936
Iteration 9 - Test accuracy: 0.2672727272727273
