In [177]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader
import random

In [178]:
# Read GloVe embeddings from file
def read_glove_embeddings(file_path):
    word_to_index = {}
    embeddings = []

    with open(file_path, 'r', encoding='utf-8') as file:
        for line in file:
            values = line.strip().split(' ')
            word = values[0]
            vector = torch.tensor([float(val) for val in values[1:]])
            word_to_index[word] = len(embeddings)
            embeddings.append(vector)

    return torch.stack(embeddings), word_to_index

embedding_dim = 100

# File path of the GloVe embeddings
glove_file = 'glove.6B.100d.txt'

# Read GloVe embeddings
pretrained_embeddings, word_to_index = read_glove_embeddings(glove_file)

In [282]:
# Define model
class WordClassifier(nn.Module):
    def __init__(self, embedding_dim, hidden_size, num_classes, pretrained_embeddings):
        super(WordClassifier, self).__init__()
        self.embedding = nn.Embedding.from_pretrained(pretrained_embeddings, freeze=True)
        self.fc1 = nn.Linear(embedding_dim, hidden_size)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, num_classes)
        self.softmax = nn.Softmax(dim=1)
        
    def forward(self, x):
        embedded = self.embedding(x)
        out = self.fc1(embedded)
        out = self.relu(out)
        out = self.fc2(out)
        return out

In [283]:
# read data for training
data = []
with open('nouns.txt', 'r') as f:
    for line in f:
        word = line.strip()
        if word in word_to_index:
            data.append((word_to_index[word], 1))

with open('verbs.txt', 'r') as f:
    for line in f:
        word = line.strip()
        if word in word_to_index:
            data.append((word_to_index[word], 0))

# convert the data into tensors
random.shuffle(data)

x_train, y_train = zip(*data)
x_train = torch.tensor(x_train)
y_train = torch.tensor(y_train)

In [284]:
# batch the training data
dataset = TensorDataset(x_train, y_train)
dataloader = DataLoader(dataset, batch_size=32, shuffle=True)

In [285]:
# Hyper parameters for the model
hidden_size = 128
num_classes = 2
vocab_size = len(pretrained_embeddings)
model = WordClassifier(embedding_dim, hidden_size, num_classes, pretrained_embeddings)

In [286]:
# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
batch_size = 32

# Training loop
num_epochs = 100

for epoch in range(num_epochs):
    running_loss = 0.0
    for inputs, labels in dataloader:
        # Clear gradients
        optimizer.zero_grad()  

        # Forward pass
        output = model(inputs)

        # Compute loss
        loss = criterion(output, labels)

        # Backward pass
        loss.backward()

        # Update weights
        optimizer.step()
        
        running_loss += loss.item()
        
    # Print loss
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss / len(dataloader)}")

Epoch [1/100], Loss: 0.23052525226958095
Epoch [2/100], Loss: 0.1507930949718381
Epoch [3/100], Loss: 0.1397523626840363
Epoch [4/100], Loss: 0.13166636880487204
Epoch [5/100], Loss: 0.1253807268066642
Epoch [6/100], Loss: 0.12109721158631145
Epoch [7/100], Loss: 0.11563364943140186
Epoch [8/100], Loss: 0.11182313437263171
Epoch [9/100], Loss: 0.1084568169588844
Epoch [10/100], Loss: 0.10495198189746588
Epoch [11/100], Loss: 0.10154954196574788
Epoch [12/100], Loss: 0.10089617767371237
Epoch [13/100], Loss: 0.09853415322043778
Epoch [14/100], Loss: 0.0965213670162484
Epoch [15/100], Loss: 0.09585682293206142
Epoch [16/100], Loss: 0.09345788976061158
Epoch [17/100], Loss: 0.09204120346342583
Epoch [18/100], Loss: 0.09161887724573413
Epoch [19/100], Loss: 0.09042833413501891
Epoch [20/100], Loss: 0.0891924318412445
Epoch [21/100], Loss: 0.08899488753571252
Epoch [22/100], Loss: 0.08818319341323028
Epoch [23/100], Loss: 0.08691267452765411
Epoch [24/100], Loss: 0.08708405909322513
Epoch [

In [313]:
test_words = ["talk"]
test_input = []
for word in test_words:
    if word in word_to_index:
        test_input.append(word_to_index[word])
        
test_input = torch.tensor(test_input)

In [314]:
test_input

tensor([1077])

In [315]:
with torch.no_grad():
    output = model(test_input)

In [316]:
output

tensor([[ 0.4335, -0.7813]])

In [317]:
_, predicted_labels = torch.max(output, dim=1)

In [318]:
predicted_labels

tensor([0])