# Turkish Diacritisation | YZV 405E NLP Term Project

Author: Bora Boyacıoğlu

Student ID: 150200310

## Step 2: Training

In [None]:
!pip install unidecode --quiet

[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/235.5 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━[0m [32m225.3/235.5 kB[0m [31m7.6 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m235.5/235.5 kB[0m [31m6.1 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
import os
import sys
import time
from datetime import datetime as dt

import numpy as np
import pickle as pkl

import torch
from torch import nn
from torch import optim
from torch.utils.data import DataLoader


In [None]:
%load_ext autoreload
%autoreload 2

### Reloading the Processed Data

In [None]:
path = ''

Mount the drive for Google Colab. <font color='red'>Do not run this for local use.</font>

In [None]:
# Mound the Drive.
from google.colab import drive
drive.mount('/content/drive')

# Delete the sample_data folder because I don't like unnecessary things.
!rm -rf sample_data

# Update the data folder.
path = '/content/drive/MyDrive/Share/NLP/'

# Append the data folder path to system.
sys.path.append(path)

Mounted at /content/drive


Import local libraries and classes. Open data and vocab files.

In [None]:
# Import local files.
from utils.main_utils import *
from utils.model import Encoder, Decoder, Seq2Seq

# Load the train dataset.
with open(path + 'data/train_data.pkl', 'rb') as f:
    train_data = pkl.load(f)

# Load the test dataset.
with open(path + 'data/test_data.pkl', 'rb') as f:
    test_data = pkl.load(f)

# Load the vocab.
with open(path + 'data/vocab.pkl', 'rb') as f:
    vocab = pkl.load(f)

In [None]:
train_und = np.array(train_data.undiacritized)
train_d = np.array(train_data.diacritized)

test_und = np.array(test_data.undiacritized)
test_d = np.array(test_data.diacritized)

vocab_size = len(vocab['w2i'])
max_len = train_und.shape[1]

### Initialising or Reloading the Model

In [None]:
emb_dim = 64
hid_dim = 256
n_layers = 2
dropout = 0.5
batch_size = 18
clip = 1
early_stop = 3

In [None]:
new = False
loaded_loss = None
loaded_epoch = None
loaded_timestamp = None

Initialise the model.

In [None]:
# The device.
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Training parameters.
params = {
    'emb_dim': emb_dim,
    'hid_dim': hid_dim,
    'n_layers': n_layers,
    'dropout': dropout
}

# Define the models.
encoder = Encoder(input_dim=vocab_size, **params)
decoder = Decoder(output_dim=vocab_size - 1, **params)
model = Seq2Seq(encoder, decoder, device).to(device)

# Define the optimiser and the loss function.
optimizer = optim.Adam(model.parameters())
criterion = nn.CrossEntropyLoss(ignore_index=vocab['w2i']["<pad>"])

Or, load the previous model.

In [None]:
if not new:
    # Set the names.
    timestamp = '2024-05-05_18-42-23'
    name = 'e37-l4.42-p64_256_2_0.5_18'

    # Load the saved checkpoint.
    checkpoint = torch.load(f'{path}models/{timestamp}/{name}.pth', map_location=device)

    # Load the model and optimizer.
    model.load_state_dict(checkpoint['model_state_dict'])
    optimizer.load_state_dict(checkpoint['optimizer_state_dict'])

    # Set the loaded variables.
    loaded_loss = checkpoint['loss']
    loaded_epoch = checkpoint['epoch']
    loaded_timestamp = timestamp

In [None]:
params['batch_size'] = batch_size

# Define the data loader.
loader = DataLoader(train_data, batch_size=params['batch_size'], shuffle=True)

Define the train loop.

In [None]:
def train(model, loader, optimizer, criterion, clip, device, verbose=False) -> float:
    """ Training function.
    Args:
        model (Seq2Seq): The model.
        loader (DataLoader): The data loader.
        optimizer (optim.Adam): The optimiser.
        criterion (nn.CrossEntropyLoss): The loss function.
        clip (float): The gradient clipping value.
        device (torch.device): The device.

    Returns:
        epoch_loss (float): The epoch loss.
    """
    time_init = time.time()
    skipped = 0

    model.train()
    epoch_loss = 0

    for i, (und, d) in enumerate(loader):
        # Move the data to the device.
        und, d = und.to(device), d.to(device)
        optimizer.zero_grad()

        # Run the model.
        try:
            output = model(und, d)

            # Reshape.
            output_dim = output.shape[-1]
            output = output[1:].view(-1, output_dim)
            d = d[1:].view(-1)

            # Calculate the loss.
            loss = criterion(output, d)
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), clip)
            optimizer.step()
            epoch_loss += loss.item()
        except RuntimeError as e:
            skipped += 1

        # Verbose the batch.
        verbose_batch(i, time_init, len(loader), skipped, epoch_loss) if verbose else None

    return epoch_loss / (len(loader) - skipped)

In [None]:
# Define the train loop values.
timestamp = loaded_timestamp or dt.now().strftime("%Y-%m-%d_%H-%M-%S")
!mkdir -p {path}models/{timestamp}

print(f"Timestamp: {timestamp}")

num_epochs = 40
epoch = loaded_epoch or 0
not_improved = 0

# Check if there is a loaded loss.
best_loss = loaded_loss or float('inf')
prev_save = None

Timestamp: 2024-05-05_18-42-23


Start training.

In [17]:
print("\033[92m"
    f"Training started",
    str(f"from epoch {epoch + 1} " if epoch > 0 else "") +
    f"for {num_epochs} epoch" + ('s' if num_epochs > 1 else ''),
    f"with parameters",
    f"{', '.join(f'{key}={value}' for key, value in params.items())}",
    sep=" ", end=".\033[0m\n\n"
)

verbose = True
del_prev = True

if epoch == num_epochs:
    num_epochs += 1

while epoch < num_epochs:
    try:
        # Train the model.
        train_loss = train(model, loader, optimizer, criterion, clip, device, verbose)

        # Increment the epoch.
        epoch += 1

        # Verbose the epoch.
        print(f"\n\033[93mEpoch: {epoch}/{num_epochs}, Train Loss: {train_loss:.4f}\033[0m", end="")

        # Check if the loss decreased.
        if train_loss > best_loss:
            print("\n")

            # Check for the early stop criteria.
            not_improved += 1
            if not_improved > early_stop:
                print(f"\n\033[92mEarly stopping at epoch {epoch}. The best loss is {best_loss}.\033[0m")
                break

            continue

        # Update the loss.
        best_loss = train_loss

        # Determine the save addresses.
        file_name = f"e{epoch}-l{train_loss:.2f}-p{'_'.join(str(param) for param in params.values())}.pth"
        save_name = f"{path}models/{timestamp}/{file_name}"

        # Save the model.
        torch.save({
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'loss': best_loss,
            'epoch': epoch
        }, save_name)

        # Remove the previous save.
        if prev_save and del_prev:
            os.remove(prev_save)
        prev_save = save_name

        print(f" [Saved as '{file_name}']\n")
    except KeyboardInterrupt:
        print(f"\n\033[91mTraining interrupted at epoch {epoch + 1}.\033[0m")
        break

print(f"\n\033[92mTraining completed. The best loss is {best_loss}.\033[0m")

[92mTraining started from epoch 41 for 60 epochs with parameters emb_dim=64, hid_dim=256, n_layers=2, dropout=0.5, batch_size=18.[0m

[93mEpoch: 41/60, Train Loss: 4.2284[0m [Saved as 'e41-l4.23-p64_256_2_0.5_18.pth']

[93mEpoch: 42/60, Train Loss: 4.1811[0m [Saved as 'e42-l4.18-p64_256_2_0.5_18.pth']

[93mEpoch: 43/60, Train Loss: 4.1417[0m [Saved as 'e43-l4.14-p64_256_2_0.5_18.pth']

[93mEpoch: 44/60, Train Loss: 4.0989[0m [Saved as 'e44-l4.10-p64_256_2_0.5_18.pth']

[93mEpoch: 45/60, Train Loss: 4.0534[0m [Saved as 'e45-l4.05-p64_256_2_0.5_18.pth']

[93mEpoch: 46/60, Train Loss: 4.0230[0m [Saved as 'e46-l4.02-p64_256_2_0.5_18.pth']

[93mEpoch: 47/60, Train Loss: 3.9822[0m [Saved as 'e47-l3.98-p64_256_2_0.5_18.pth']

[93mEpoch: 48/60, Train Loss: 3.9522[0m [Saved as 'e48-l3.95-p64_256_2_0.5_18.pth']

[93mEpoch: 49/60, Train Loss: 3.9125[0m [Saved as 'e49-l3.91-p64_256_2_0.5_18.pth']

[93mEpoch: 50/60, Train Loss: 3.8874[0m [Saved as 'e50-l3.89-p64_256_2_0.5_18.p