In [1]:
from custom_torch_dataset import SwipeDataset
import os
from torch.utils.data import random_split

dataset_path = os.path.join(os.getcwd(), "dataset")

data = SwipeDataset(data_dir=dataset_path,
                    batch=False)

train_set, val_set, test_set = random_split(data, [0.8, 0.1, 0.1])

In [None]:
from torch.utils.data import Subset
reduced_set = Subset(data, range(200))
train_set, val_set, test_set = random_split(reduced_set, [0.8, 0.1, 0.1])

In [None]:
data[0][2].shape

In [2]:
import torch
from torch.nn.utils.rnn import pack_sequence

def collate_fn(batch):
    """
    :param batch: List of tuples (input, word, word_tensor)
                  - input: (T, 6)
                  - word: a string of characters
                  - word_tensor: encoded word as indicies with 0 as the blank

    """
    # Sort batch by sequence length (descending order)
    batch.sort(key=lambda x: x[0].shape[0], reverse=True)

    inputs, words, targets = zip(*batch)
    input_lengths = torch.LongTensor([x.shape[0] for x in inputs])  # store the lengths of inputs
    input = pack_sequence(inputs)   # pack the inputs

    target_lengths = torch.LongTensor([len(x) for x in words])
    targets = torch.cat(targets)    # concatenate all the targets

    return input, targets, input_lengths, target_lengths, words

In [36]:
from torch.utils.data import DataLoader

train_loader = DataLoader(train_set, batch_size=16, shuffle=True, collate_fn=collate_fn)
val_loader = DataLoader(val_set, batch_size=16, shuffle=True, collate_fn=collate_fn)
test_loader = DataLoader(test_set, batch_size=16, shuffle=True, collate_fn=collate_fn)

dataloaders = {"train": train_loader,
               "val": val_loader}

In [None]:
data[:][0][0].shape

In [None]:
input_lengths = [x.shape[1] for x in data[:][0][:]]
max(input_lengths)

In [None]:
from torch.nn.utils.rnn import pad_sequence
from torch.utils.data import DataLoader

def collate_fn(batch):
    # Separate inputs and targets
    inputs = [item[0] for item in batch]
    targets = [item[1] for item in batch]
    
    # Pad inputs dynamically
    padded_inputs = pad_sequence(inputs, batch_first=True, padding_value=(0, 0, -1, 0, 0, 0))
    
    # Pad targets dynamically (if needed)
    padded_targets = pad_sequence(targets, batch_first=True, padding_value="PAD")
    
    return padded_inputs, padded_targets

test_loader = DataLoader(test_set, batch_size=32, collate_fn=collate_fn)

In [None]:
test_in, test_word, test_tensor = next(iter(test_loader))

In [None]:
import torch
from model import Seq2Seq

device = torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu")
t_model = Seq2Seq(hidden_size=32,
                  num_layers=2, 
                  input_size=6, 
                  output_size=27).to(device)

In [None]:
def train_batch(batch_indicies, set, model, criterion, device):
    """
    Takes a batch of variable lengths
    """
    for i in batch_indicies:
        input, word, word_tensor = set[i]
        input = input.to(device)
        word_tensor = word_tensor.to(device)

In [None]:
import time
import random
import numpy as np
import torch.nn as nn

def train_model(model, train_set, val_set, optimiser, criterion=nn.CTCLoss(), batch_size=32, num_epochs=10):

    train_losses = []
    val_losses = []

    since = time.time()

    for epoch in range(num_epochs):
        print(f"Epoch {epoch + 1}/{num_epochs}")
        print("-" * 10)

        for phase in ["train", "val"]:
            # set the mode of the model based on the phase and change the data used
            if phase == "train":
                model.train()
                batches = list(range(len(train_set)))
                data = train_set
            else:
                model.eval()
                batches = list(range(len(val_set)))
                data = val_set

            # batch like this because the words are of different lenghts and batching isnt used
            random.shuffle(batches)
            batches = np.array_split(batches, len(batches) // batch_size)

            running_loss = 0

            for batch in batches:   # iterate over each batch of dataset
                batch_loss = 0

                optimiser.zero_grad()
                # enable gradients only if in training mode
                with torch.set_grad_enabled(phase == "train"):
                    for i in batch:     # for every datapoint in the batch
                        input, word, word_tensor = data[i]

                        input = input.to(device)
                        word_tensor = word_tensor.to(device)
                        word_length = len(word)

                        output = t_model(input)
                        # rearrange the output for CTC loss
                        output = output.permute(1, 0, 2)    # (T, N, C)
                        # convert to tensors
                        input_lengths = torch.LongTensor([input.shape[1]]).to(device)
                        target_lengths = torch.LongTensor([word_length]).to(device)

                        loss = criterion(output, word_tensor, input_lengths, target_lengths)
                        batch_loss += loss
                    # find mean batch loss
                    avg_batch_loss = batch_loss / len(batch)

                    # back prop only if in train
                    if phase == "train":
                        avg_batch_loss.backward()
                        # clip the loss so we dont get exploding gradients
                        nn.utils.clip_grad_norm_(model.parameters(), 3)
                        optimiser.step()
                # calculates the total loss for epoch over all batches
                running_loss += avg_batch_loss.item()

            # track epoch oss
            if phase == "train":
                epoch_loss = running_loss / len(batches)
                train_losses.append(epoch_loss)
            else:
                epoch_loss = running_loss / len(batches)
                val_losses.append(epoch_loss)
            
            print(f'{phase} Loss: {epoch_loss:.4f}')

        time_elapsed = time.time() - since
        print(f"Time elapsed: {time_elapsed // 60:.0f}m {time_elapsed % 60:.0f}s")       

    return model    

In [None]:
import torch
from model import Seq2Seq

device = torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu")
t_model = Seq2Seq(hidden_size=32,
                  num_layers=2, 
                  input_size=6, 
                  output_size=27).to(device)

optimiser = torch.optim.SGD(t_model.parameters(), lr=0.01)
criterion = nn.CTCLoss(blank=0)
t_model = train_model(t_model, train_set, val_set, optimiser,criterion, num_epochs=5, batch_size=32)

In [None]:
test_in, test_word, test_ten = test_set[0]
test_in = test_in.to(device)

In [None]:
test_out = t_model(test_in)

In [None]:
vocabulary = {'_': 0, 'a': 1, 'b': 2, 'c': 3, 'd': 4, 'e': 5, 'f': 6, 'g': 7, 'h': 8,
              'i': 9, 'j': 10, 'k': 11, 'l': 12, 'm': 13, 'n': 14, 'o': 15, 'p': 16,
              'q': 17, 'r': 18, 's': 19, 't': 20, 'u': 21, 'v': 22, 'w': 23, 'x': 24,
              'y': 25, 'z': 26}
reversed_vocab = {k: u for u, k in vocabulary.items()}

In [None]:
def handle_outputs(decoder_output):
    indicies = torch.argmax(decoder_output.squeeze(1), dim=-1).tolist()
    words = []
    for word in indicies:
        characters = [reversed_vocab[i] for i in word]
        words.append(characters)
    
    return words

In [None]:
handle_outputs(test_out)

In [19]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn.utils.rnn import pad_packed_sequence, pack_sequence

device = torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu")

class CTCEncoder(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers=2, output_size=27, bidirectional=True):
        super(CTCEncoder, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.bidirectional = bidirectional
        
        self.lstm = nn.LSTM(input_size=input_size, 
                            hidden_size=hidden_size, 
                            num_layers=num_layers, 
                            batch_first=False,
                            bidirectional=bidirectional)
        
        self.fc = nn.Linear(2 * hidden_size if bidirectional else hidden_size, output_size)
        
    def forward(self, x):
        """
        : param x: (batch_size, seq_len, input_size)
        : return logits: (seq_len, batch_size, output_size)
        """
        lstm_outputs, _ = self.lstm(x)  # lstm_out shape: (seq_len, batch_size, hidden_size*2 if bidirectional)

        lstm_out = pad_packed_sequence(lstm_outputs, batch_first=False)[0]

        logits = self.fc(lstm_out)  # shape: (batch_size, seq_len, output_size)
        return F.log_softmax(logits, dim=-1)  # Log-softmax for CTC loss
    
c_model = CTCEncoder(input_size=6,
                     hidden_size=32,
                     num_layers=2,
                     output_size=27,
                     bidirectional=True).to(device)

In [18]:
t_inputs, t_targets, t_input_lengths, t_target_lengths, t_words = next(iter(train_loader))
t_output = c_model(t_inputs.to(device))

In [47]:
import torch
import time
from tempfile import TemporaryDirectory
import os

def CTCtrain(model, dataloaders, optimiser, num_epochs = 5, scheduler = None):
    criterion = torch.nn.CTCLoss(blank=0, zero_infinity=True)

    since = time.time()
    # Create a temporary directory to save training checkpoints
    with TemporaryDirectory() as tempdir:
        best_model_params_path = os.path.join(tempdir, 'best_model_params.pt')

        torch.save(model.state_dict(), best_model_params_path)
        lowest_loss = float("Inf")
        
        for epoch in range(num_epochs):
            print(f'Epoch {epoch + 1}/{num_epochs}')
            print('-' * 10)

            # Each epoch has a training and validation phase
            for phase in ['train', 'val']:
                if phase == 'train':
                    model.train()  # Set model to training mode
                else:
                    model.eval()   # Set model to evaluate mode

                running_loss = 0.0

                # Iterate over data.
                for inputs, targets, input_lengths, target_lengths, words in dataloaders[phase]:
                    inputs = inputs.to(device)
                    targets = targets.to(device)

                    input_lengths = input_lengths.to(device)
                    target_lengths = target_lengths.to(device)

                    # zero the parameter gradients
                    optimiser.zero_grad()

                    # forward
                    # track history if only in train
                    with torch.set_grad_enabled(phase == 'train'):
                        outputs = model(inputs)
                        loss = criterion(outputs, targets, input_lengths, target_lengths)

                        # backward + optimize only if in training phase
                        if phase == 'train':
                            # nn.utils.clip_grad_norm_(model.parameters(), max_norm=5)
                            loss.backward()
                            optimiser.step()

                    # statistics
                    running_loss += loss.item() * len(target_lengths)  # multiply by batch size
                if phase == 'train' and scheduler is not None:
                    scheduler.step(loss)

                epoch_loss = running_loss / len(dataloaders[phase])

                print(f'{phase} Loss: {epoch_loss:.4f}')

                # deep copy the model
                if phase == 'val' and epoch_loss < lowest_loss:
                    lowest_loss = epoch_loss
                    torch.save(model.state_dict(), best_model_params_path)

            time_elapsed = time.time() - since
            print(f"Time Elapsed: {time_elapsed // 60:.0f}m {time_elapsed % 60:.0f}s")
            print()

        time_elapsed = time.time() - since
        print(f'Training complete in {time_elapsed // 60:.0f}m {time_elapsed % 60:.0f}s')
        print(f'Lowest Loss: {lowest_loss:4f}')

        # load best model weights
        model.load_state_dict(torch.load(best_model_params_path, weights_only=True))
    return model

In [53]:
c_model = CTCEncoder(input_size=6,
                     hidden_size=256,
                     num_layers=2,
                     output_size=27,
                     bidirectional=True).to(device)
optimiser = torch.optim.AdamW(c_model.parameters(), lr=0.01)
# optimiser = torch.optim.SGD(c_model.parameters(), lr=0.01, momentum=0.1)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimiser, factor=0.5)
c_model = CTCtrain(c_model, dataloaders, optimiser, num_epochs=300, scheduler=scheduler)

Epoch 1/300
----------
train Loss: 53.1365
val Loss: 49.0688
Time Elapsed: 0m 14s

Epoch 2/300
----------
train Loss: 49.3247
val Loss: 48.2561
Time Elapsed: 0m 29s

Epoch 3/300
----------
train Loss: 48.6487
val Loss: 47.8550
Time Elapsed: 0m 43s

Epoch 4/300
----------
train Loss: 48.1371
val Loss: 47.4451
Time Elapsed: 0m 58s

Epoch 5/300
----------
train Loss: 48.2068
val Loss: 47.0347
Time Elapsed: 1m 12s

Epoch 6/300
----------
train Loss: 47.6476
val Loss: 46.9267
Time Elapsed: 1m 26s

Epoch 7/300
----------
train Loss: 47.4838
val Loss: 47.0585
Time Elapsed: 1m 40s

Epoch 8/300
----------
train Loss: 47.1763
val Loss: 46.7381
Time Elapsed: 1m 55s

Epoch 9/300
----------
train Loss: 46.8663
val Loss: 46.8102
Time Elapsed: 2m 9s

Epoch 10/300
----------
train Loss: 46.6139
val Loss: 46.8531
Time Elapsed: 2m 23s

Epoch 11/300
----------
train Loss: 46.4473
val Loss: 45.9656
Time Elapsed: 2m 38s

Epoch 12/300
----------
train Loss: 46.1274
val Loss: 45.5698
Time Elapsed: 2m 52s

Ep

KeyboardInterrupt: 