In [1]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from tqdm import tqdm
from utils import PositionalEncoder, load_data, split_data
from models import TrainConfig, RNNClassifier

if torch.cuda.is_available():
    for i in range(torch.cuda.device_count()):
        print("Device: cuda")
        print(torch.cuda.get_device_name(i))
else:
    print("Device: cpu")

Device: cuda
Tesla T4


In [4]:
# Use this if running on Colab
# from google.colab import drive
# drive.mount('/content/drive')
# base_dir = "/content/drive/MyDrive/data/power"

# Use this if running locally
base_dir = "data/power"

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [5]:
# Load packages
file_list = [
    'power-gb-train.tsv',
    'power-ua-train.tsv',
    # 'power-fr-train.tsv',
    # 'power-nl-train.tsv',
]

full_data = load_data(folder_path=base_dir, file_list=file_list,text_head='text_en')
train_dev_raw, test_raw = split_data(full_data, test_size=0.2, random_state=0)
train_raw, dev_raw = split_data(train_dev_raw, test_size=0.2, random_state=0)


Load power-gb-train.tsv...
Load power-ua-train.tsv...


In [6]:

print("Prepare data encoder...")
train_encoder = PositionalEncoder()
train_encoder.fit(train_raw.texts)

Prepare data encoder...


In [7]:
train_dataloader = DataLoader(train_raw, batch_size=50, shuffle=True)
test_dataloader = DataLoader(test_raw, batch_size=50, shuffle=True)

# Prepare baseline config
train_config = TrainConfig(
    optimizer_params = {'lr': 0.01},
    num_epochs       = 10,
    early_stop       = False,
    violation_limit  = 5
)

# Train baseline model
baseline_lstm = RNNClassifier(
    rnn_network         = nn.LSTM,
    word_embedding_dim  = 32,
    hidden_dim          = 64,
    bidirectional       = False,
    dropout             = 0,
    encoder             = train_encoder,
    device              = 'cuda'
)

# TODO: Ask Fredrik if this is truly the case
# This is slow because LSTM reads in one word in the sentence at a time. The maximum "sentence" length of a batch can be 1000,
# so it does at least 1000 matrix multiplication per batch
baseline_lstm.fit(train_dataloader, train_config, no_progress_bar=False)



  tokens_sparse = torch.sparse_csr_tensor(crow, col, token_val, size=mat_size, dtype=torch.long)
Epoch 1: 100%|██████████| 567/567 [01:50<00:00,  5.12batch/s, batch_accuracy=0.703, loss=66.4]
Epoch 2: 100%|██████████| 567/567 [01:53<00:00,  4.99batch/s, batch_accuracy=0.676, loss=63.5]
Epoch 3: 100%|██████████| 567/567 [01:50<00:00,  5.15batch/s, batch_accuracy=0.73, loss=62.5]
Epoch 4: 100%|██████████| 567/567 [01:51<00:00,  5.08batch/s, batch_accuracy=0.73, loss=62.2]
Epoch 5: 100%|██████████| 567/567 [01:50<00:00,  5.15batch/s, batch_accuracy=0.757, loss=61.4]
Epoch 6: 100%|██████████| 567/567 [01:49<00:00,  5.17batch/s, batch_accuracy=0.811, loss=46.4]
Epoch 7: 100%|██████████| 567/567 [01:50<00:00,  5.14batch/s, batch_accuracy=0.973, loss=66]
Epoch 8: 100%|██████████| 567/567 [01:50<00:00,  5.13batch/s, batch_accuracy=0.919, loss=81.9]
Epoch 9: 100%|██████████| 567/567 [01:50<00:00,  5.15batch/s, batch_accuracy=0.838, loss=56.8]
Epoch 10: 100%|██████████| 567/567 [01:49<00:00,  5.

In [10]:
# Train baseline model
baseline_gru = RNNClassifier(
    rnn_network         = nn.GRU,
    word_embedding_dim  = 32,
    hidden_dim          = 64,
    bidirectional       = False,
    dropout             = 0,
    encoder             = train_encoder,
    device              = 'cuda'
)

baseline_gru.fit(train_dataloader, train_config, no_progress_bar=False)


Epoch 1: 100%|██████████| 567/567 [01:51<00:00,  5.08batch/s, batch_accuracy=0.73, loss=62]
Epoch 2: 100%|██████████| 567/567 [01:51<00:00,  5.08batch/s, batch_accuracy=0.703, loss=59.1]
Epoch 3: 100%|██████████| 567/567 [01:51<00:00,  5.07batch/s, batch_accuracy=0.703, loss=62.5]
Epoch 4: 100%|██████████| 567/567 [01:52<00:00,  5.06batch/s, batch_accuracy=0.757, loss=58.4]
Epoch 5: 100%|██████████| 567/567 [01:51<00:00,  5.06batch/s, batch_accuracy=0.514, loss=65.3]
Epoch 6: 100%|██████████| 567/567 [01:51<00:00,  5.11batch/s, batch_accuracy=0.514, loss=53.7]
Epoch 7: 100%|██████████| 567/567 [01:50<00:00,  5.11batch/s, batch_accuracy=0.622, loss=67.6]
Epoch 8: 100%|██████████| 567/567 [01:51<00:00,  5.09batch/s, batch_accuracy=0.757, loss=62.7]
Epoch 9: 100%|██████████| 567/567 [01:53<00:00,  4.98batch/s, batch_accuracy=0.486, loss=54.6]
Epoch 10: 100%|██████████| 567/567 [01:52<00:00,  5.06batch/s, batch_accuracy=0.784, loss=66.5]


In [11]:

def evaluate_model(
        model: nn.Module | RNNClassifier,
        test_dataloader,
        train_encoder
    ) -> float:
    """Evaluate the model on an inputs-targets set, using accuracy metric.

    Parameters
    ----------
    model : nn.Module
        Should be one of the two custom RNN taggers we defined.
    inputs : torch.Tensor
    targets : torch.Tensor
    pad_tag_idx : int
        Index of the <PAD> tag in the tagset to be ignored when calculating accuracy

    Returns
    -------
    float
        Accuracy metric (ignored the <PAD> tag)
    """
    corrects = []
    total_dpoints = 0
    for ids, speakers, raw_inputs, raw_targets in tqdm(test_dataloader, unit="batch"):

        batch_encoder = PositionalEncoder(vocabulary=train_encoder.vocabulary)
        inputs = batch_encoder.fit_transform(raw_inputs)
        targets = torch.as_tensor(raw_targets, dtype=torch.float).to(model.device)  # nn.CrossEntropyLoss() require target to be float

        # Make prediction
        scores = model(inputs.to(model.device))
        pred = scores > 0.5
        correct = (pred == targets).sum().item()
        corrects.append(correct)
        total_dpoints += len(inputs)

    accuracy = sum(corrects) / total_dpoints

    return accuracy

# Evaluate  model
baseline_lstm_acc = evaluate_model(baseline_lstm, test_dataloader, train_encoder)
print(f"Last train accuracy: {baseline_lstm.training_accuracy_[-1] * 100:.1f}%. Test accuracy {baseline_lstm_acc * 100:.1f}%")

baseline_lstm_gru = evaluate_model(baseline_gru, test_dataloader, train_encoder)
print(f"Last train accuracy: {baseline_gru.training_accuracy_[-1] * 100:.1f}%. Test accuracy {baseline_lstm_gru * 100:.1f}%")



100%|██████████| 181/181 [00:24<00:00,  7.33batch/s]


Last train accuracy: 94.6%. Test accuracy 70.9%


100%|██████████| 181/181 [00:24<00:00,  7.46batch/s]

Last train accuracy: 78.4%. Test accuracy 58.9%



