In [1]:
#main.py

import torch
from torch import nn
import torch.nn.functional as F
from sklearn.feature_extraction.text import CountVectorizer
from sentiment_data import read_sentiment_examples
from torch.utils.data import Dataset, DataLoader
import time
import argparse
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader
from BOWmodels import SentimentDatasetBOW, NN2BOW, NN3BOW
from DANmodels import DAN
from sentiment_data import read_word_embeddings

In [2]:
train_data = SentimentDatasetBOW("data/train.txt")
print(f"First 5 examples: {train_data.examples[:5]}")

First 5 examples: [['the', 'rock', 'is', 'destined', 'to', 'be', 'the', '21st', 'century', "'s", 'new', '``', 'conan', "''", 'and', 'that', 'he', "'s", 'going', 'to', 'make', 'a', 'splash', 'even', 'greater', 'than', 'arnold', 'schwarzenegger', ',', 'jean-claud', 'van', 'damme', 'or', 'steven', 'segal', '.']; label=1, ['the', 'gorgeously', 'elaborate', 'continuation', 'of', '``', 'the', 'lord', 'of', 'the', 'rings', "''", 'trilogy', 'is', 'so', 'huge', 'that', 'a', 'column', 'of', 'words', 'can', 'not', 'adequately', 'describe', 'co-writer\\/director', 'peter', 'jackson', "'s", 'expanded', 'vision', 'of', 'j.r.r.', 'tolkien', "'s", 'middle-earth', '.']; label=1, ['singer\\/composer', 'bryan', 'adams', 'contributes', 'a', 'slew', 'of', 'songs', '--', 'a', 'few', 'potential', 'hits', ',', 'a', 'few', 'more', 'simply', 'intrusive', 'to', 'the', 'story', '--', 'but', 'the', 'whole', 'package', 'certainly', 'captures', 'the', 'intended', ',', 'er', ',', 'spirit', 'of', 'the', 'piece', '.'];

In [3]:
print(f"Sentences: {train_data.sentences[:5]}")
print(f"Labels: {train_data.labels[:5]}")

Sentences: ["the rock is destined to be the 21st century 's new `` conan '' and that he 's going to make a splash even greater than arnold schwarzenegger , jean-claud van damme or steven segal .", "the gorgeously elaborate continuation of `` the lord of the rings '' trilogy is so huge that a column of words can not adequately describe co-writer\\/director peter jackson 's expanded vision of j.r.r. tolkien 's middle-earth .", 'singer\\/composer bryan adams contributes a slew of songs -- a few potential hits , a few more simply intrusive to the story -- but the whole package certainly captures the intended , er , spirit of the piece .', 'yet the act is still charming here .', "whether or not you 're enlightened by any of derrida 's lectures on `` the other '' and `` the self , '' derrida is an undeniably fascinating and playful fellow ."]
Labels: tensor([1, 1, 1, 1, 1])


In [4]:
# Load pre-trained GloVe embeddings (for example, using 50-dimensional embeddings)
glove_embeddings = read_word_embeddings("data/glove.6B.50d-relativized.txt")

Read in 14923 vectors of size 50


In [5]:
glove_embeddings.vectors.shape

(14923, 50)

In [11]:
# Initialize the DAN model with the loaded embeddings
dan_model = DAN(embeddings=glove_embeddings, hidden_size=300, dropout=0.5, num_layers=2, fine_tune_embeddings=False)

In [13]:
dan_model.embedding

Embedding(14923, 50)

In [None]:
glove_embeddings = read_word_embeddings("data/glove.6B.50d-relativized.txt")

# Use the provided method to get a torch.nn.Embedding layer
embedding_layer = glove_embeddings.get_initialized_embedding_layer(frozen=False)  # Set frozen=True to freeze embeddings

# Initialize the DAN model using the embedding layer
dan_model = DAN(embeddings=embedding_layer.weight, hidden_size=300, dropout=0.5, num_layers=2, fine_tune_embeddings=False)

In [None]:
dan_model.

In [15]:
# Now you can run the embedding test
sentence_example = torch.tensor([[1, 23, 45]])  # A batch with a single sentence (adjust this to match your test case)
embedded = dan_model.embedding(sentence_example)
print(f"Word embeddings: {embedded}")
avg_embedding = embedded.mean(dim=1)
print(f"Averaged sentence embedding: {avg_embedding}")

Word embeddings: tensor([[[ 0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
           0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
           0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
           0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
           0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
           0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
           0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
           0.0000],
         [ 0.2772,  0.8847, -0.2625,  0.0841,  0.4081, -1.1697, -0.6852,
           0.1427, -0.5735, -0.5857, -0.5083, -0.8641, -0.5260, -0.5638,
           0.3286,  0.4339, -0.2125,  0.4936, -1.8137, -0.0357,  1.3227,
           0.8087,  0.0122, -0.0870, -0.1681, -1.5935,  0.4703,  0.2610,
          -0.4167, -0.3853,  3.4413,  0.3438, -0.0359, -0.5678,  0.1838,
          -0.4865,  0.4265,  0.4408,  1.0931,  0.0639, -0.0643, -0.2923,
           0.0

In [27]:
train_data = SentimentDatasetBOW("data/train.txt")
dev_data = SentimentDatasetBOW("data/dev.txt")

In [29]:
train_data.sentences

["the rock is destined to be the 21st century 's new `` conan '' and that he 's going to make a splash even greater than arnold schwarzenegger , jean-claud van damme or steven segal .",
 "the gorgeously elaborate continuation of `` the lord of the rings '' trilogy is so huge that a column of words can not adequately describe co-writer\\/director peter jackson 's expanded vision of j.r.r. tolkien 's middle-earth .",
 'singer\\/composer bryan adams contributes a slew of songs -- a few potential hits , a few more simply intrusive to the story -- but the whole package certainly captures the intended , er , spirit of the piece .',
 'yet the act is still charming here .',
 "whether or not you 're enlightened by any of derrida 's lectures on `` the other '' and `` the self , '' derrida is an undeniably fascinating and playful fellow .",
 'just the labour involved in creating the layered richness of the imagery in this chiaroscuro of madness and light is astonishing .',
 'part of the charm of 

In [31]:
train_loader = DataLoader(train_data, batch_size=16, shuffle=True)
test_loader = DataLoader(dev_data, batch_size=16, shuffle=False)

In [37]:
len(train_loader.dataset)

6920

In [42]:
for i in enumerate(train_loader):
    print(i)

(0, [tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]]), tensor([1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0])])
(1, [tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 1., 0., 1.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [1., 0., 0.,  ..., 0., 0., 0.]]), tensor([1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0])])
(2, [tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]]), tensor([1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1])])
(3, [tensor([[0., 0., 0.,  ..., 0., 0., 0.]

In [44]:
16*432

6912