In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torch.nn.utils.rnn import pad_sequence, pack_padded_sequence, pad_packed_sequence

In [None]:
from preprocessing import (
    train_tokens_tokenized, train_entities, train_intents,
    test_tokens_tokenized, test_entities, test_intents,
    dev_tokens_tokenized, dev_entities, dev_intents, current_int
)

In [None]:
# Hyperparameters
EMBEDDING_DIM = 64
HIDDEN_DIM = 128
BATCH_SIZE = 1
EPOCHS = 3
LEARNING_RATE = 0.001

# Create a dataset class
class PizzaDataset(Dataset):
    def __init__(self, tokens, entities, intents):
        self.tokens = tokens
        self.entities = entities
        self.intents = intents

    def __len__(self):
        return len(self.tokens)

    def __getitem__(self, idx):
        return torch.tensor(self.tokens[idx]), torch.tensor(self.entities[idx]), torch.tensor(self.intents[idx])

# Define the RNN model
class CombinedRNN(nn.Module):
    def __init__(self, vocab_size, label_size, embedding_dim, hidden_dim):
        super(CombinedRNN, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.rnn = nn.LSTM(embedding_dim, hidden_dim, batch_first=True)
        self.fc_entity = nn.Linear(hidden_dim, label_size)
        self.fc_intent = nn.Linear(hidden_dim, label_size)

    def forward(self, tokens):
        embeddings = self.embedding(tokens)  # Shape: (batch_size, seq_len, embedding_dim)
        rnn_out, _ = self.rnn(embeddings)
        entity_logits = self.fc_entity(rnn_out)  # Shape: (batch_size, seq_len, label_size)
        intent_logits = self.fc_intent(rnn_out)  # Shape: (batch_size, seq_len, label_size)
        return entity_logits, intent_logits

# Padding function for the DataLoader
def collate_fn(batch):
    tokens, entities, intents = zip(*batch)

    # Pad sequences to the maximum length in the batch
    tokens_padded = pad_sequence(tokens, batch_first=True, padding_value=-1)
    entities_padded = pad_sequence(entities, batch_first=True, padding_value=-1)
    intents_padded = pad_sequence(intents, batch_first=True, padding_value=-1)

    return tokens_padded, entities_padded, intents_padded

# Create the DataLoader
def data_loader(tokens,entities,intents):
    dataset = PizzaDataset(tokens, entities, intents)
    return DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True, collate_fn=collate_fn)

train_dataloader = data_loader(train_tokens_tokenized, train_entities, train_intents)
test_dataloader = data_loader(test_tokens_tokenized, test_entities, test_intents)
dev_dataloader = data_loader(dev_tokens_tokenized, dev_entities, dev_intents)

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss(ignore_index=-1)  # Ignore padding index (-1)

# Check if CUDA is available and set the device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Initialize the model
VOCAB_SIZE = current_int 
LABEL_SIZE = 23 # label.map length

model = CombinedRNN(VOCAB_SIZE, LABEL_SIZE, EMBEDDING_DIM, HIDDEN_DIM).to(device)

# Define the optimizer
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)

# Training loop
def train_model(model, dataloader, criterion, optimizer, epochs):
    model.train()
    for epoch in range(epochs):
        total_loss = 0
        for tokens, entities, intents in dataloader:
            tokens, entities, intents = tokens.to(device), entities.to(device), intents.to(device)
            optimizer.zero_grad()
            
            # Forward pass
            entity_logits, intent_logits = model(tokens)

            # Compute loss
            loss_entities = criterion(entity_logits.view(-1, LABEL_SIZE), entities.view(-1))
            loss_intents = criterion(intent_logits.view(-1, LABEL_SIZE), intents.view(-1))
            loss = loss_entities + loss_intents

            # Backward pass
            loss.backward()
            optimizer.step()

            total_loss += loss.item()

        print(f"Epoch {epoch + 1}/{epochs}, Loss: {total_loss:.4f}")

# Train the model
train_model(model, train_dataloader, criterion, optimizer, EPOCHS)

In [None]:
def evaluate_accuracy(model, dataloader,printValues=True):
    model.eval()
    correct_entities = 0
    correct_intents = 0
    total_entities = 0
    total_intents = 0

    with torch.no_grad():
        for tokens, entities, intents in dataloader:
            tokens, entities, intents = tokens.to(device), entities.to(device), intents.to(device)
            # Forward pass
            entity_logits, intent_logits = model(tokens)

            # Calculate predictions
            _, predicted_entities = torch.max(entity_logits, dim=-1)
            _, predicted_intents = torch.max(intent_logits, dim=-1)
            if printValues:
                # Print actual and predicted intents and entities
                print("Predicted Intents:", predicted_intents)
                print("Actual Intents:   ", intents)
                print("Predicted Entities:", predicted_entities)
                print("Actual Entities:   ", entities)

            # Calculate accuracy for entities (ignoring padding)
            mask = entities != -1  # Ignore padding
            
            # print(predicted_entities.shape,entities.shape)
            correct_entities += (predicted_entities == entities).masked_select(mask).sum().item()
            total_entities += mask.sum().item()

            # Calculate accuracy for intents
            mask = intents != -1
            correct_intents += (predicted_intents == intents).masked_select(mask).sum().item()
            total_intents += mask.sum().item()

    # Compute final accuracies
    entity_accuracy = correct_entities / total_entities if total_entities > 0 else 0
    intent_accuracy = correct_intents / total_intents if total_intents > 0 else 0
    print(f"Entity Accuracy: {entity_accuracy:.4f}, Intent Accuracy: {intent_accuracy:.4f}")

In [None]:
evaluate_accuracy(model, test_dataloader, False)

In [None]:
evaluate_accuracy(model, dev_dataloader,False)