In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
import os
from preprocessing import *
%pip install fasttext-wheel
from lstm import *
from crf import create_bio_tags

ROOT_DIR = os.path.dirname(os.path.abspath(""))




In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [2]:
train_data_tokens = load_tokens(os.path.join(ROOT_DIR, "data", "training_data_tokens.json"))
with open(os.path.join(ROOT_DIR, "data", "training_data.json"), "r") as f:
	train_data = json.load(f)
train_data_bio = create_bio_tags(train_data, train_data_tokens)

In [4]:
with open(os.path.join(ROOT_DIR, "data", "training_data_lemmas.json"), "r") as f:
	train_data_lemmas = json.load(f)
with open(os.path.join(ROOT_DIR, "data", "training_data_pos.json"), "r") as f:
	train_data_pos = json.load(f)

In [5]:
ft = load_fasttext()

In [6]:
training_set = OverlappingWindowDataset(
	train_data_tokens,
	train_data_lemmas,
	train_data_pos,
	train_data_bio,
	ft,
	seq_len=10,
	padding_value=0)

In [7]:
train_loader = DataLoader(training_set, batch_size=32, shuffle=True)

In [9]:
# Model definition
class NegationDetectionModel(nn.Module):
    def __init__(self, word_embedding_dim, hidden_dim, num_layers, output_dim):
        super(NegationDetectionModel, self).__init__()
        
        # BiLSTM Layer
        self.bilstm = nn.LSTM(word_embedding_dim, hidden_dim, num_layers, bidirectional=True, batch_first=True)
        
        # Dense Layer
        self.fc = nn.Linear(hidden_dim * 2, output_dim) # hidden_dim * 2 is done because is BIdirectional. Hence, we have the double dimensions
        
    def forward(self, word_embeds):
        
        # BiLSTM
        lstm_out, _ = self.bilstm(word_embeds)
        # Dense Layer
        out = self.fc(lstm_out)
        
        return out

# Hyperparameters
word_embedding_dim = 617 # 300 word, 300 lemma, 17 pos
hidden_dim = 300
output_dim = 1 # single word
num_layers=3
# Instantiate the model
model = NegationDetectionModel(word_embedding_dim, hidden_dim, num_layers, output_dim)

In [10]:
# Training function
def train(model, dataloader, criterion, optimizer):
    # Training loop
    for epoch in range(10):
        for sequences, targets in tqdm(dataloader):
            # Forward pass
            sequences = sequences.to(device)
            targets = targets.to(device)
            #print(sequences.size(), targets.size())
            outputs = model(sequences)
            #print(outputs[0].size(), targets[0].size())
            #print(outputs[0])
            # Compute loss
            loss = criterion(outputs.squeeze(), targets)
            
            # Backward pass and optimization
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
        print(f'Epoch {epoch+1}, Loss: {loss.item()}')

In [11]:
# Define loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

train(model, train_loader, criterion, optimizer)

  1%|          | 52/5705 [00:27<49:53,  1.89it/s] 


KeyboardInterrupt: 

In [None]:
# Test the model
def test_model(model, test_loader, criterion):

    model.eval()  # Set model to evaluation mode

    test_loss = 0.0

    with torch.no_grad():
        for sequences, targets in test_loader:
            sequences = sequences.to(device)
            targets = targets.to(device)
            outputs = model(sequences)
            loss = criterion(outputs, targets)

            test_loss += loss.item()

    avg_test_loss = test_loss / len(test_loader)
    print(f"Average Test Loss: {avg_test_loss}")