# Recurrent Neural Networks

Grzegorz Statkiewicz, Mateusz Matukiewicz

## Overview

The structure of the direcotry should be as follows:

```
.
├── data
│   ├── train.pkl
│   └── test_no_target.pkl
└── main.ipynb
```



## Setup

Select the device to use

In [86]:
!nvidia-smi

Sun May 25 12:55:36 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 560.35.02              Driver Version: 560.94         CUDA Version: 12.6     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA GeForce GTX 1660 Ti     On  |   00000000:1C:00.0  On |                  N/A |
|  0%   46C    P8             17W /  130W |     993MiB /   6144MiB |      3%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

In [87]:
import os
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"



In [88]:
import pickle
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from collections import Counter
import numpy as np
import os
import random
from torch.nn.utils.rnn import pad_sequence

device = torch.device("cuda") if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu"
print(f"Using device: {device}")

Using device: cuda


### Config for reproductivity

In [89]:
SEED = 42

random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
if torch.cuda.is_available():
    torch.cuda.manual_seed(SEED)
    torch.cuda.manual_seed_all(SEED)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

## Data preparation

Load the data

In [90]:
train_path = "data/train.pkl"

In [91]:
def load_data(file_path):
    """Loads data from a pickle file."""
    try:
        with open(file_path, "rb") as f:
            data = pickle.load(f)
        return data
    except FileNotFoundError:
        print(f"Error: File not found at {file_path}")
        return None


In [92]:
train_data = load_data(train_path)

print(f"Loaded {len(train_data)} training samples.")

print(train_data[0])

Loaded 2939 training samples.
(array([ -1.,  -1.,  -1., ...,  78.,  40., 144.], shape=(4756,)), 0)


In [93]:
compositors = {0: 'bach', 1: 'beethoven', 2: 'debussy', 3: 'scarlatti', 4: 'victoria'}
num_classes = len(compositors)

In [94]:
import numpy as np

sequences = [torch.tensor(seq, dtype=torch.long) for (seq, label) in train_data]
labels = [label for (seq, label) in train_data]

# Find the max chord index (vocab size, since chords are ints)
all_chords = set()
for seq in sequences:
    all_chords.update(seq.tolist())
vocab_size = int(max(all_chords)) + 2  # +1 for max, +1 for padding idx=0

print(f"Vocab size: {vocab_size}")

Vocab size: 193


In [95]:
from torch.utils.data import Dataset, DataLoader
from torch.nn.utils.rnn import pad_sequence

class ChordDataset(Dataset):
    def __init__(self, sequences, labels):
        self.sequences = [seq + 1 for seq in sequences]
        self.labels = labels
    def __len__(self):
        return len(self.sequences)
    def __getitem__(self, idx):
        return self.sequences[idx], self.labels[idx]

def collate_fn(batch):
    seqs, labels = zip(*batch)
    lengths = torch.tensor([len(s) for s in seqs], dtype=torch.long)
    padded_seqs = pad_sequence(seqs, batch_first=True, padding_value=0)
    return padded_seqs, lengths, torch.tensor(labels, dtype=torch.long)


In [96]:
from sklearn.model_selection import train_test_split

BATCH_SIZE = 256

train_data_split, val_data_split = train_test_split(train_data, test_size=0.2, random_state=42)

train_sequences = [torch.tensor(seq, dtype=torch.long) for (seq, label) in train_data_split]
train_labels = [label for (seq, label) in train_data_split]
val_sequences = [torch.tensor(seq, dtype=torch.long) for (seq, label) in val_data_split]
val_labels = [label for (seq, label) in val_data_split]

train_dataset = ChordDataset(train_sequences, train_labels)
val_dataset = ChordDataset(val_sequences, val_labels)


train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, collate_fn=collate_fn)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, collate_fn=collate_fn)


In [97]:
from torch.utils.data import WeightedRandomSampler

class_sample_counts = np.array([train_labels.count(i) for i in range(num_classes)])
weights = 1. / class_sample_counts

sample_weights = np.array([weights[label] for label in train_labels])
sample_weights = torch.DoubleTensor(sample_weights)

sampler = WeightedRandomSampler(
    weights=sample_weights,
    num_samples=len(sample_weights),
    replacement=True
)

sampled_train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, sampler=sampler, collate_fn=collate_fn)

## Model

In [98]:
import torch.nn as nn

class LSTMClassifier(nn.Module):
    def __init__(self, vocab_size, embed_dim, hidden_dim, output_dim, num_layers, dropout_p=0.5, bidirectional=False):
        super().__init__()
        self.embedding = nn.Embedding(vocab_size, embed_dim, padding_idx=0) # padding_idx=0 assumes 0 is used for padding

        self.lstm = nn.LSTM(
            input_size=embed_dim,
            hidden_size=hidden_dim,
            num_layers=num_layers,
            batch_first=True,  
            dropout=dropout_p if num_layers > 1 else 0,
            bidirectional=bidirectional
        )

        self.dropout = nn.Dropout(dropout_p)

        fc_input_dim = hidden_dim * 2 if bidirectional else hidden_dim
        self.fc = nn.Linear(fc_input_dim, output_dim)

        self.hidden_dim = hidden_dim
        self.num_layers = num_layers
        self.bidirectional = bidirectional

    def forward(self, x, lengths):


        # --- START DEBUGGING EMBEDDING INPUT ---
        print(f"--- Inside LSTMClassifier.forward ---")
        print(f"Input x device: {x.device}")
        print(f"Input x dtype: {x.dtype}")
        print(f"Input x shape: {x.shape}")
        if x.numel() > 0: # Check if tensor is not empty
            print(f"Min value in x: {x.min().item()}")
            print(f"Max value in x: {x.max().item()}")
        else:
            print("Input x is empty!")
        print(f"Embedding layer vocab size (num_embeddings): {self.embedding.num_embeddings}")
        print(f"Embedding layer padding_idx: {self.embedding.padding_idx}")

        embedded = self.embedding(x)


        packed_embedded = nn.utils.rnn.pack_padded_sequence(
            embedded, lengths.cpu(), batch_first=True, enforce_sorted=False
        )

        # LSTM
        packed_output, (h_n, c_n) = self.lstm(packed_embedded)

    
        if self.bidirectional:
            h_n_last_layer_forward = h_n[-2, :, :]
            h_n_last_layer_backward = h_n[-1, :, :]
            hidden = torch.cat((h_n_last_layer_forward, h_n_last_layer_backward), dim=1)
        else:
            hidden = h_n[-1, :, :]


        hidden = self.dropout(hidden)
        logits = self.fc(hidden)
        return logits

In [None]:
VOCAB_SIZE = vocab_size
EMBED_DIM = 32
HIDDEN_DIM = 64
OUTPUT_DIM = 5
NUM_LAYERS = 2
DROPOUT_P = 0.4

In [100]:
model = LSTMClassifier(
        vocab_size=VOCAB_SIZE,
        embed_dim=EMBED_DIM,
        hidden_dim=HIDDEN_DIM,
        output_dim=OUTPUT_DIM,
        num_layers=2,
        dropout_p=DROPOUT_P,
        bidirectional=False
    ).to(device)

In [101]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)

In [102]:
print(model)
print(f"Number of parameters: {sum(p.numel() for p in model.parameters() if p.requires_grad)}")


LSTMClassifier(
  (embedding): Embedding(193, 32, padding_idx=0)
  (lstm): LSTM(32, 64, num_layers=2, batch_first=True, dropout=0.4)
  (dropout): Dropout(p=0.4, inplace=False)
  (fc): Linear(in_features=64, out_features=5, bias=True)
)
Number of parameters: 64869


In [None]:
for epoch in range(NUM_EPOCHS):
    model.train()
    running_loss = 0.0
    correct_predictions = 0
    total_predictions = 0

    for i, (sequences_batch, lengths_batch, labels_batch) in enumerate(sampled_train_loader):
        sequences_batch = sequences_batch.to(device)
        labels_batch = labels_batch.to(device)

        optimizer.zero_grad()

        outputs = model(sequences_batch, lengths_batch)
        loss = criterion(outputs, labels_batch)
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * sequences_batch.size(0)

        _, predicted_labels = torch.max(outputs, 1)
        correct_predictions += (predicted_labels == labels_batch).sum().item()
        total_predictions += labels_batch.size(0)

    epoch_loss = running_loss / total_predictions
    epoch_acc = correct_predictions / total_predictions

    # Validation
    model.eval()
    val_running_loss = 0.0
    val_correct_predictions = 0
    val_total_predictions = 0
    with torch.no_grad():
        for sequences_batch, lengths_batch, labels_batch in val_loader:
            sequences_batch = sequences_batch.to(device)
            labels_batch = labels_batch.to(device)

            outputs = model(sequences_batch, lengths_batch)
            loss = criterion(outputs, labels_batch)
            val_running_loss += loss.item() * sequences_batch.size(0)

            _, predicted_labels = torch.max(outputs, 1)
            val_correct_predictions += (predicted_labels == labels_batch).sum().item()
            val_total_predictions += labels_batch.size(0)

    val_epoch_loss = val_running_loss / val_total_predictions
    val_epoch_acc = val_correct_predictions / val_total_predictions

    print(f"Epoch [{epoch+1}/{NUM_EPOCHS}]")
    print(f"  Train Loss: {epoch_loss:.4f}, Train Acc: {epoch_acc:.4f}")
    print(f"  Val Loss: {val_epoch_loss:.4f},   Val Acc: {val_epoch_acc:.4f}")

# print("Training finished.")


Epoch [1/10]
  Train Loss: 1.6037, Train Acc: 0.2144
  Val Loss: 1.5910,   Val Acc: 0.1633


KeyboardInterrupt: 