In [1]:
%pip install -q transformers

In [7]:
from typing import List, Tuple

import time
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from sklearn.metrics import f1_score
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset, SubsetRandomSampler, TensorDataset, DataLoader
from tqdm.notebook import tqdm
from transformers import AutoModel, AutoTokenizer

In [2]:
device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cuda'

# Dataset

In [5]:
train = pd.read_csv("/content/train.csv")
test = pd.read_csv("/content/test.csv")

In [49]:
ltoi = {i + 1: i for i in range(5)}
itol = {i: l for l, i in ltoi.items()}

In [None]:
train.rate = train.rate.map(ltoi)

In [7]:
class Ratings(Dataset):
    def __init__(self, data: pd.DataFrame):
        self.data: pd.DataFrame = data
        self.text: List[str] = data.text.to_list()
        self.targets = None
        if "rate" in data:
            self.targets = data.rate.tolist()

    def __getitem__(self, index) -> Tuple[str, int] | str:
        if self.targets:
            return (self.text[index], self.targets[index])
        return self.text[index]

    def __len__(self) -> int:
        return len(self.text)

In [8]:
train_ds = Ratings(train)
test_ds = Ratings(test)

In [9]:
dev_ratio = 0.15

# Get the labels for each sample in the dataset
targets = [
    train_ds[i][1] for i in range(len(train_ds))
]  # Assuming labels are at index 1 in each sample

train_ids, dev_ids = train_test_split(
    range(len(train_ds)), test_size=dev_ratio, stratify=targets, random_state=42
)

train_sampler = SubsetRandomSampler(train_ids)
dev_sampler = SubsetRandomSampler(dev_ids)

# Tokenizer

In [10]:
model_name = "ai-forever/sbert_large_nlu_ru"
tokenizer = AutoTokenizer.from_pretrained(model_name)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/1.27k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/3.71M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

In [11]:
tokenizer

BertTokenizerFast(name_or_path='ai-forever/sbert_large_nlu_ru', vocab_size=120138, model_max_length=1000000000000000019884624838656, is_fast=True, padding_side='right', truncation_side='right', special_tokens={'unk_token': '[UNK]', 'sep_token': '[SEP]', 'pad_token': '[PAD]', 'cls_token': '[CLS]', 'mask_token': '[MASK]'}, clean_up_tokenization_spaces=True),  added_tokens_decoder={
	0: AddedToken("[PAD]", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	100: AddedToken("[UNK]", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	101: AddedToken("[CLS]", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	102: AddedToken("[SEP]", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	103: AddedToken("[MASK]", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
}

In [58]:
def collate(batch):
    if isinstance(batch[0], tuple):
        tok_inp = tokenizer(
            [batch[i][0] for i in range(len(batch))],
            padding=True,
            truncation=True,
            return_tensors="pt",
            max_length=512,
        )
        targets = [batch[i][1] for i in range(len(batch))]
        return tok_inp, targets

    tok_inp = tokenizer(
        [batch[i] for i in range(len(batch))],
        padding=True,
        truncation=True,
        return_tensors="pt",
        max_length=512,
    )
    return tok_inp

In [88]:
from torch.utils.data import DataLoader

batch_sizes = {"train": 128, "dev": 128, "test": 128}
train_loader = DataLoader(
    train_ds, batch_size=batch_sizes["train"], sampler=train_sampler, collate_fn=collate
)
dev_loader = DataLoader(
    train_ds, batch_size=batch_sizes["dev"], sampler=dev_sampler, collate_fn=collate
)
test_loader = DataLoader(test_ds, batch_size=batch_sizes["test"], collate_fn=collate)

# Feature Extraction

In [61]:
sbert = AutoModel.from_pretrained(model_name)

In [62]:
sbert.to(device)

BertModel(
  (embeddings): BertEmbeddings(
    (word_embeddings): Embedding(120138, 1024, padding_idx=0)
    (position_embeddings): Embedding(512, 1024)
    (token_type_embeddings): Embedding(2, 1024)
    (LayerNorm): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
    (dropout): Dropout(p=0.1, inplace=False)
  )
  (encoder): BertEncoder(
    (layer): ModuleList(
      (0-23): 24 x BertLayer(
        (attention): BertAttention(
          (self): BertSdpaSelfAttention(
            (query): Linear(in_features=1024, out_features=1024, bias=True)
            (key): Linear(in_features=1024, out_features=1024, bias=True)
            (value): Linear(in_features=1024, out_features=1024, bias=True)
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (output): BertSelfOutput(
            (dense): Linear(in_features=1024, out_features=1024, bias=True)
            (LayerNorm): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
            (dropout): Dropout(p=0.1,

In [63]:
#Mean Pooling - Take attention mask into account for correct averaging
def mean_pooling(model_output, attention_mask):
    token_embeddings = model_output[0] #First element of model_output contains all token embeddings
    input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
    sum_embeddings = torch.sum(token_embeddings * input_mask_expanded, 1)
    sum_mask = torch.clamp(input_mask_expanded.sum(1), min=1e-9)
    return sum_embeddings / sum_mask

In [77]:
train_sent_embs = torch.empty(size=(len(train_ids), 1024), dtype=torch.float32)
train_targets = list()

In [78]:
cur_idx = 0
sbert.eval()
with torch.no_grad():
    for inputs, targets in tqdm(train_loader):
        inputs = inputs.to(device)
        outputs = sbert(**inputs)
        sent_embs = mean_pooling(outputs, inputs['attention_mask'])
        train_sent_embs[cur_idx:cur_idx+len(targets)] = sent_embs
        train_targets.extend(targets)
        cur_idx += len(targets)

  0%|          | 0/324 [00:00<?, ?it/s]

In [80]:
dev_sent_embs = torch.empty(size=(len(dev_ids), 1024))
dev_targets = list()

In [81]:
cur_idx = 0
sbert.eval()
with torch.no_grad():
    for inputs, targets in tqdm(dev_loader):
        inputs = inputs.to(device)
        outputs = sbert(**inputs)
        sent_embs = mean_pooling(outputs, inputs['attention_mask'])
        dev_sent_embs[cur_idx:cur_idx+len(targets)] = sent_embs
        cur_idx += len(targets)
        dev_targets.extend(targets)

  0%|          | 0/58 [00:00<?, ?it/s]

In [87]:
test_sent_embs = torch.empty(size=(len(test_ds), 1024))

In [89]:
cur_idx = 0
sbert.eval()
with torch.no_grad():
    for inputs in tqdm(test_loader):
        inputs = inputs.to(device)
        outputs = sbert(**inputs)
        sent_embs = mean_pooling(outputs, inputs['attention_mask'])
        test_sent_embs[cur_idx:cur_idx+batch_sizes["test"]] = sent_embs
        cur_idx += batch_sizes["test"]

  0%|          | 0/96 [00:00<?, ?it/s]

In [90]:
# save tensors
torch.save(train_sent_embs, "train_sent_embs.pt")
torch.save(torch.tensor(train_targets, dtype=torch.int16), "train_targets.pt")

torch.save(dev_sent_embs, "dev_sent_embs.pt")
torch.save(torch.tensor(dev_targets, dtype=torch.int16), "dev_targets.pt")

torch.save(test_sent_embs, "test_sent_embs.pt")

# Load embeddings

In [3]:
train_sent_embs = torch.load("train_sent_embs.pt")
train_targets = torch.load("train_targets.pt")

dev_sent_embs = torch.load("dev_sent_embs.pt")
dev_targets = torch.load("dev_targets.pt")

test_sent_embs = torch.load("test_sent_embs.pt")

  train_sent_embs = torch.load("train_sent_embs.pt")
  train_targets = torch.load("train_targets.pt")
  dev_sent_embs = torch.load("dev_sent_embs.pt")
  dev_targets = torch.load("dev_targets.pt")
  test_sent_embs = torch.load("test_sent_embs.pt")


In [33]:
train_sent_emb_ds = TensorDataset(train_sent_embs, train_targets.to(torch.long))
dev_sent_emb_ds = TensorDataset(dev_sent_embs, dev_targets.to(torch.long))
test_sent_emb_ds = TensorDataset(test_sent_embs)

In [34]:
sent_emb_batch_sizes = {
    "train": 256,
    "dev": 256,
    "test": 256
}
train_sent_emb_loader = DataLoader(train_sent_emb_ds, sent_emb_batch_sizes["train"], shuffle=True)
dev_sent_emb_loader = DataLoader(dev_sent_emb_ds, sent_emb_batch_sizes["dev"])
test_sent_emb_loader = DataLoader(test_sent_emb_ds, sent_emb_batch_sizes["test"])

# Model

In [74]:
class RatingModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.l1 = nn.Sequential(
            nn.Linear(1024, 512, bias=False),
            nn.BatchNorm1d(512),
            nn.Dropout(0.5),
            nn.GELU(),
        )
        self.l2 = nn.Sequential(
            nn.Linear(512, 256),
            nn.Dropout(0.25),
            nn.GELU(),
        )
        self.l3 = nn.Sequential(
            nn.Linear(256, 32),
            nn.BatchNorm1d(32),
            nn.GELU(),
        )
        self.final_layer = nn.Linear(32, 5)

    def forward(self, x):
        return self.final_layer(self.l3(self.l2(self.l1(x))))

In [81]:
rating_model = RatingModel()

# Saving and loading

In [82]:
def save(model, path: str) -> None:
    torch.save(model.state_dict(), path)


def load(path: str):
    model = RatingModel()
    model.load_state_dict(torch.load(path, weights_only=True))
    return model

# Train

In [83]:
rating_model.to(device);

In [84]:
n_epochs = 50

opt = torch.optim.Adam(rating_model.parameters(), lr=3e-4)
loss_fn = torch.nn.CrossEntropyLoss()

In [86]:
history = {
    "train_loss": [],
    "dev_loss": [],
    "train_time": [],
    "val_accuracy": [],
    "val_f1_macro": [],
    "val_f1_weighted": [],
}
best_val_f1_w = 0.0

In [87]:
for ep in range(n_epochs):
    print(f"\nEpoch {ep + 1}/{n_epochs}")
    epoch_start_time = time.time()

    # Training
    rating_model.train()
    train_loss_temp = 0.0
    total_train_samples = 0

    for inputs, targets in tqdm(train_sent_emb_loader, desc="Training", leave=False):
        inputs, targets = inputs.to(device), targets.to(device)
        opt.zero_grad()
        logits = rating_model(inputs)
        loss = loss_fn(logits, targets)
        loss.backward()
        opt.step()

        batch_size = targets.size(0)
        train_loss_temp += loss.detach().cpu().item() * batch_size
        total_train_samples += batch_size

    avg_train_loss = train_loss_temp / total_train_samples
    history["train_loss"].append(avg_train_loss)

    # Validation
    rating_model.eval()
    dev_loss_temp = 0.0
    total_dev_samples = 0
    correct_predictions = 0
    all_targets = []
    all_predictions = []

    with torch.no_grad():
        for inputs, targets in tqdm(dev_sent_emb_loader, desc="Validation", leave=False):
            inputs, targets = inputs.to(device), targets.to(device)
            logits = rating_model(inputs)
            loss = loss_fn(logits, targets)

            batch_size = targets.size(0)
            dev_loss_temp += loss.detach().cpu().item() * batch_size
            total_dev_samples += batch_size

            predictions = logits.argmax(dim=1)
            correct_predictions += (predictions == targets).sum().item()

            all_predictions.extend(predictions.cpu().numpy())
            all_targets.extend(targets.cpu().numpy())

    # Average validation loss and accuracy for the epoch
    avg_dev_loss = dev_loss_temp / total_dev_samples
    val_accuracy = correct_predictions / total_dev_samples
    history["dev_loss"].append(avg_dev_loss)
    history["val_accuracy"].append(val_accuracy)

    # Calculate F1 Score
    history["val_f1_weighted"].append(
        f1_score(all_targets, all_predictions, average="weighted")
    )
    history["val_f1_macro"].append(
        f1_score(all_targets, all_predictions, average="macro")
    )

    # Epoch timing
    epoch_time = time.time() - epoch_start_time
    history["train_time"].append(epoch_time)

    # Save model
    if (history["val_f1_weighted"][-1] > best_val_f1_w):
        best_val_f1_w = history["val_f1_weighted"][-1]
        save(rating_model, "rating_model.pt")
        print(f"Save model at {ep} epoch.")

    # Log epoch results
    print(
        f"Epoch [{ep + 1}/{n_epochs}] - "
        f"Train Loss: {avg_train_loss:.4f} | "
        f"Dev Loss: {avg_dev_loss:.4f} | "
        f"Val Accuracy: {val_accuracy * 100:.2f}% | "
        f"Val F1 Score weighted: {history['val_f1_weighted'][-1]:.4f} | "
        f"Val F1 Score macro: {history['val_f1_macro'][-1]:.4f} | "
        f"Time: {epoch_time:.2f} seconds"
    )


Epoch 1/50


Training:   0%|          | 0/162 [00:00<?, ?it/s]

Validation:   0%|          | 0/29 [00:00<?, ?it/s]

Save model at 0 epoch.
Epoch [1/50] - Train Loss: 1.0019 | Dev Loss: 0.8734 | Val Accuracy: 66.56% | Val F1 Score weighted: 0.6488 | Val F1 Score macro: 0.4670 | Time: 3.33 seconds

Epoch 2/50


Training:   0%|          | 0/162 [00:00<?, ?it/s]

Validation:   0%|          | 0/29 [00:00<?, ?it/s]

Save model at 1 epoch.
Epoch [2/50] - Train Loss: 0.8526 | Dev Loss: 0.8234 | Val Accuracy: 67.14% | Val F1 Score weighted: 0.6512 | Val F1 Score macro: 0.4682 | Time: 2.53 seconds

Epoch 3/50


Training:   0%|          | 0/162 [00:00<?, ?it/s]

Validation:   0%|          | 0/29 [00:00<?, ?it/s]

Epoch [3/50] - Train Loss: 0.8130 | Dev Loss: 0.8131 | Val Accuracy: 67.22% | Val F1 Score weighted: 0.6486 | Val F1 Score macro: 0.4654 | Time: 1.78 seconds

Epoch 4/50


Training:   0%|          | 0/162 [00:00<?, ?it/s]

Validation:   0%|          | 0/29 [00:00<?, ?it/s]

Save model at 3 epoch.
Epoch [4/50] - Train Loss: 0.7923 | Dev Loss: 0.8131 | Val Accuracy: 67.52% | Val F1 Score weighted: 0.6529 | Val F1 Score macro: 0.4749 | Time: 1.59 seconds

Epoch 5/50


Training:   0%|          | 0/162 [00:00<?, ?it/s]

Validation:   0%|          | 0/29 [00:00<?, ?it/s]

Save model at 4 epoch.
Epoch [5/50] - Train Loss: 0.7791 | Dev Loss: 0.8072 | Val Accuracy: 67.48% | Val F1 Score weighted: 0.6531 | Val F1 Score macro: 0.4730 | Time: 1.48 seconds

Epoch 6/50


Training:   0%|          | 0/162 [00:00<?, ?it/s]

Validation:   0%|          | 0/29 [00:00<?, ?it/s]

Save model at 5 epoch.
Epoch [6/50] - Train Loss: 0.7673 | Dev Loss: 0.8031 | Val Accuracy: 67.63% | Val F1 Score weighted: 0.6545 | Val F1 Score macro: 0.4746 | Time: 1.28 seconds

Epoch 7/50


Training:   0%|          | 0/162 [00:00<?, ?it/s]

Validation:   0%|          | 0/29 [00:00<?, ?it/s]

Save model at 6 epoch.
Epoch [7/50] - Train Loss: 0.7586 | Dev Loss: 0.8092 | Val Accuracy: 67.79% | Val F1 Score weighted: 0.6586 | Val F1 Score macro: 0.4797 | Time: 1.10 seconds

Epoch 8/50


Training:   0%|          | 0/162 [00:00<?, ?it/s]

Validation:   0%|          | 0/29 [00:00<?, ?it/s]

Epoch [8/50] - Train Loss: 0.7489 | Dev Loss: 0.8181 | Val Accuracy: 67.56% | Val F1 Score weighted: 0.6542 | Val F1 Score macro: 0.4784 | Time: 1.38 seconds

Epoch 9/50


Training:   0%|          | 0/162 [00:00<?, ?it/s]

Validation:   0%|          | 0/29 [00:00<?, ?it/s]

Epoch [9/50] - Train Loss: 0.7396 | Dev Loss: 0.8169 | Val Accuracy: 67.41% | Val F1 Score weighted: 0.6548 | Val F1 Score macro: 0.4771 | Time: 1.11 seconds

Epoch 10/50


Training:   0%|          | 0/162 [00:00<?, ?it/s]

Validation:   0%|          | 0/29 [00:00<?, ?it/s]

Save model at 9 epoch.
Epoch [10/50] - Train Loss: 0.7292 | Dev Loss: 0.8202 | Val Accuracy: 67.64% | Val F1 Score weighted: 0.6623 | Val F1 Score macro: 0.4924 | Time: 1.07 seconds

Epoch 11/50


Training:   0%|          | 0/162 [00:00<?, ?it/s]

Validation:   0%|          | 0/29 [00:00<?, ?it/s]

Epoch [11/50] - Train Loss: 0.7217 | Dev Loss: 0.8296 | Val Accuracy: 67.40% | Val F1 Score weighted: 0.6592 | Val F1 Score macro: 0.4844 | Time: 1.11 seconds

Epoch 12/50


Training:   0%|          | 0/162 [00:00<?, ?it/s]

Validation:   0%|          | 0/29 [00:00<?, ?it/s]

Epoch [12/50] - Train Loss: 0.7137 | Dev Loss: 0.8367 | Val Accuracy: 67.25% | Val F1 Score weighted: 0.6583 | Val F1 Score macro: 0.4918 | Time: 1.07 seconds

Epoch 13/50


Training:   0%|          | 0/162 [00:00<?, ?it/s]

Validation:   0%|          | 0/29 [00:00<?, ?it/s]

Epoch [13/50] - Train Loss: 0.7024 | Dev Loss: 0.8415 | Val Accuracy: 67.36% | Val F1 Score weighted: 0.6582 | Val F1 Score macro: 0.4847 | Time: 1.09 seconds

Epoch 14/50


Training:   0%|          | 0/162 [00:00<?, ?it/s]

Validation:   0%|          | 0/29 [00:00<?, ?it/s]

Epoch [14/50] - Train Loss: 0.6962 | Dev Loss: 0.8511 | Val Accuracy: 67.00% | Val F1 Score weighted: 0.6546 | Val F1 Score macro: 0.4860 | Time: 1.08 seconds

Epoch 15/50


Training:   0%|          | 0/162 [00:00<?, ?it/s]

Validation:   0%|          | 0/29 [00:00<?, ?it/s]

Epoch [15/50] - Train Loss: 0.6841 | Dev Loss: 0.8626 | Val Accuracy: 66.77% | Val F1 Score weighted: 0.6486 | Val F1 Score macro: 0.4773 | Time: 1.39 seconds

Epoch 16/50


Training:   0%|          | 0/162 [00:00<?, ?it/s]

Validation:   0%|          | 0/29 [00:00<?, ?it/s]

Epoch [16/50] - Train Loss: 0.6756 | Dev Loss: 0.8712 | Val Accuracy: 67.15% | Val F1 Score weighted: 0.6571 | Val F1 Score macro: 0.4940 | Time: 1.44 seconds

Epoch 17/50


Training:   0%|          | 0/162 [00:00<?, ?it/s]

Validation:   0%|          | 0/29 [00:00<?, ?it/s]

Epoch [17/50] - Train Loss: 0.6677 | Dev Loss: 0.8943 | Val Accuracy: 66.64% | Val F1 Score weighted: 0.6543 | Val F1 Score macro: 0.4886 | Time: 1.33 seconds

Epoch 18/50


Training:   0%|          | 0/162 [00:00<?, ?it/s]

Validation:   0%|          | 0/29 [00:00<?, ?it/s]

Epoch [18/50] - Train Loss: 0.6563 | Dev Loss: 0.9336 | Val Accuracy: 66.19% | Val F1 Score weighted: 0.6479 | Val F1 Score macro: 0.4906 | Time: 1.13 seconds

Epoch 19/50


Training:   0%|          | 0/162 [00:00<?, ?it/s]

Validation:   0%|          | 0/29 [00:00<?, ?it/s]

Epoch [19/50] - Train Loss: 0.6493 | Dev Loss: 0.9190 | Val Accuracy: 66.29% | Val F1 Score weighted: 0.6524 | Val F1 Score macro: 0.4925 | Time: 1.13 seconds

Epoch 20/50


Training:   0%|          | 0/162 [00:00<?, ?it/s]

Validation:   0%|          | 0/29 [00:00<?, ?it/s]

Epoch [20/50] - Train Loss: 0.6380 | Dev Loss: 0.9389 | Val Accuracy: 65.97% | Val F1 Score weighted: 0.6531 | Val F1 Score macro: 0.4933 | Time: 1.12 seconds

Epoch 21/50


Training:   0%|          | 0/162 [00:00<?, ?it/s]

Validation:   0%|          | 0/29 [00:00<?, ?it/s]

Epoch [21/50] - Train Loss: 0.6304 | Dev Loss: 0.9785 | Val Accuracy: 65.42% | Val F1 Score weighted: 0.6462 | Val F1 Score macro: 0.4817 | Time: 1.30 seconds

Epoch 22/50


Training:   0%|          | 0/162 [00:00<?, ?it/s]

Validation:   0%|          | 0/29 [00:00<?, ?it/s]

Epoch [22/50] - Train Loss: 0.6174 | Dev Loss: 1.0043 | Val Accuracy: 65.29% | Val F1 Score weighted: 0.6411 | Val F1 Score macro: 0.4747 | Time: 1.11 seconds

Epoch 23/50


Training:   0%|          | 0/162 [00:00<?, ?it/s]

Validation:   0%|          | 0/29 [00:00<?, ?it/s]

Epoch [23/50] - Train Loss: 0.6123 | Dev Loss: 1.0126 | Val Accuracy: 64.88% | Val F1 Score weighted: 0.6375 | Val F1 Score macro: 0.4735 | Time: 1.15 seconds

Epoch 24/50


Training:   0%|          | 0/162 [00:00<?, ?it/s]

Validation:   0%|          | 0/29 [00:00<?, ?it/s]

Epoch [24/50] - Train Loss: 0.6007 | Dev Loss: 1.0463 | Val Accuracy: 65.27% | Val F1 Score weighted: 0.6431 | Val F1 Score macro: 0.4815 | Time: 1.14 seconds

Epoch 25/50


Training:   0%|          | 0/162 [00:00<?, ?it/s]

Validation:   0%|          | 0/29 [00:00<?, ?it/s]

Epoch [25/50] - Train Loss: 0.5914 | Dev Loss: 1.0655 | Val Accuracy: 64.27% | Val F1 Score weighted: 0.6410 | Val F1 Score macro: 0.4867 | Time: 1.18 seconds

Epoch 26/50


Training:   0%|          | 0/162 [00:00<?, ?it/s]

Validation:   0%|          | 0/29 [00:00<?, ?it/s]

Epoch [26/50] - Train Loss: 0.5832 | Dev Loss: 1.0893 | Val Accuracy: 64.55% | Val F1 Score weighted: 0.6391 | Val F1 Score macro: 0.4747 | Time: 1.34 seconds

Epoch 27/50


Training:   0%|          | 0/162 [00:00<?, ?it/s]

Validation:   0%|          | 0/29 [00:00<?, ?it/s]

Epoch [27/50] - Train Loss: 0.5728 | Dev Loss: 1.1153 | Val Accuracy: 64.14% | Val F1 Score weighted: 0.6358 | Val F1 Score macro: 0.4702 | Time: 1.51 seconds

Epoch 28/50


Training:   0%|          | 0/162 [00:00<?, ?it/s]

Validation:   0%|          | 0/29 [00:00<?, ?it/s]

Epoch [28/50] - Train Loss: 0.5656 | Dev Loss: 1.1300 | Val Accuracy: 64.11% | Val F1 Score weighted: 0.6389 | Val F1 Score macro: 0.4812 | Time: 1.30 seconds

Epoch 29/50


Training:   0%|          | 0/162 [00:00<?, ?it/s]

Validation:   0%|          | 0/29 [00:00<?, ?it/s]

Epoch [29/50] - Train Loss: 0.5543 | Dev Loss: 1.1931 | Val Accuracy: 63.04% | Val F1 Score weighted: 0.6288 | Val F1 Score macro: 0.4691 | Time: 1.14 seconds

Epoch 30/50


Training:   0%|          | 0/162 [00:00<?, ?it/s]

Validation:   0%|          | 0/29 [00:00<?, ?it/s]

Epoch [30/50] - Train Loss: 0.5496 | Dev Loss: 1.1735 | Val Accuracy: 64.32% | Val F1 Score weighted: 0.6377 | Val F1 Score macro: 0.4752 | Time: 1.08 seconds

Epoch 31/50


Training:   0%|          | 0/162 [00:00<?, ?it/s]

Validation:   0%|          | 0/29 [00:00<?, ?it/s]

Epoch [31/50] - Train Loss: 0.5340 | Dev Loss: 1.2397 | Val Accuracy: 62.93% | Val F1 Score weighted: 0.6281 | Val F1 Score macro: 0.4637 | Time: 1.12 seconds

Epoch 32/50


Training:   0%|          | 0/162 [00:00<?, ?it/s]

Validation:   0%|          | 0/29 [00:00<?, ?it/s]

Epoch [32/50] - Train Loss: 0.5285 | Dev Loss: 1.2265 | Val Accuracy: 63.05% | Val F1 Score weighted: 0.6307 | Val F1 Score macro: 0.4680 | Time: 1.11 seconds

Epoch 33/50


Training:   0%|          | 0/162 [00:00<?, ?it/s]

Validation:   0%|          | 0/29 [00:00<?, ?it/s]

Epoch [33/50] - Train Loss: 0.5248 | Dev Loss: 1.3069 | Val Accuracy: 62.86% | Val F1 Score weighted: 0.6273 | Val F1 Score macro: 0.4634 | Time: 1.13 seconds

Epoch 34/50


Training:   0%|          | 0/162 [00:00<?, ?it/s]

Validation:   0%|          | 0/29 [00:00<?, ?it/s]

Epoch [34/50] - Train Loss: 0.5131 | Dev Loss: 1.3239 | Val Accuracy: 63.36% | Val F1 Score weighted: 0.6258 | Val F1 Score macro: 0.4615 | Time: 1.38 seconds

Epoch 35/50


Training:   0%|          | 0/162 [00:00<?, ?it/s]

Validation:   0%|          | 0/29 [00:00<?, ?it/s]

Epoch [35/50] - Train Loss: 0.5034 | Dev Loss: 1.3296 | Val Accuracy: 63.04% | Val F1 Score weighted: 0.6245 | Val F1 Score macro: 0.4547 | Time: 1.12 seconds

Epoch 36/50


Training:   0%|          | 0/162 [00:00<?, ?it/s]

Validation:   0%|          | 0/29 [00:00<?, ?it/s]

Epoch [36/50] - Train Loss: 0.4976 | Dev Loss: 1.3788 | Val Accuracy: 62.63% | Val F1 Score weighted: 0.6220 | Val F1 Score macro: 0.4531 | Time: 1.11 seconds

Epoch 37/50


Training:   0%|          | 0/162 [00:00<?, ?it/s]

Validation:   0%|          | 0/29 [00:00<?, ?it/s]

Epoch [37/50] - Train Loss: 0.4939 | Dev Loss: 1.3740 | Val Accuracy: 63.29% | Val F1 Score weighted: 0.6291 | Val F1 Score macro: 0.4670 | Time: 1.54 seconds

Epoch 38/50


Training:   0%|          | 0/162 [00:00<?, ?it/s]

Validation:   0%|          | 0/29 [00:00<?, ?it/s]

Epoch [38/50] - Train Loss: 0.4843 | Dev Loss: 1.4728 | Val Accuracy: 61.84% | Val F1 Score weighted: 0.6179 | Val F1 Score macro: 0.4540 | Time: 1.51 seconds

Epoch 39/50


Training:   0%|          | 0/162 [00:00<?, ?it/s]

Validation:   0%|          | 0/29 [00:00<?, ?it/s]

Epoch [39/50] - Train Loss: 0.4815 | Dev Loss: 1.4046 | Val Accuracy: 62.51% | Val F1 Score weighted: 0.6239 | Val F1 Score macro: 0.4647 | Time: 1.30 seconds

Epoch 40/50


Training:   0%|          | 0/162 [00:00<?, ?it/s]

Validation:   0%|          | 0/29 [00:00<?, ?it/s]

Epoch [40/50] - Train Loss: 0.4734 | Dev Loss: 1.5365 | Val Accuracy: 61.81% | Val F1 Score weighted: 0.6177 | Val F1 Score macro: 0.4545 | Time: 1.17 seconds

Epoch 41/50


Training:   0%|          | 0/162 [00:00<?, ?it/s]

Validation:   0%|          | 0/29 [00:00<?, ?it/s]

Epoch [41/50] - Train Loss: 0.4657 | Dev Loss: 1.5051 | Val Accuracy: 62.55% | Val F1 Score weighted: 0.6221 | Val F1 Score macro: 0.4563 | Time: 1.13 seconds

Epoch 42/50


Training:   0%|          | 0/162 [00:00<?, ?it/s]

Validation:   0%|          | 0/29 [00:00<?, ?it/s]

Epoch [42/50] - Train Loss: 0.4577 | Dev Loss: 1.5319 | Val Accuracy: 62.08% | Val F1 Score weighted: 0.6208 | Val F1 Score macro: 0.4585 | Time: 1.14 seconds

Epoch 43/50


Training:   0%|          | 0/162 [00:00<?, ?it/s]

Validation:   0%|          | 0/29 [00:00<?, ?it/s]

Epoch [43/50] - Train Loss: 0.4546 | Dev Loss: 1.5434 | Val Accuracy: 62.16% | Val F1 Score weighted: 0.6215 | Val F1 Score macro: 0.4582 | Time: 1.10 seconds

Epoch 44/50


Training:   0%|          | 0/162 [00:00<?, ?it/s]

Validation:   0%|          | 0/29 [00:00<?, ?it/s]

Epoch [44/50] - Train Loss: 0.4469 | Dev Loss: 1.5811 | Val Accuracy: 61.62% | Val F1 Score weighted: 0.6188 | Val F1 Score macro: 0.4588 | Time: 1.12 seconds

Epoch 45/50


Training:   0%|          | 0/162 [00:00<?, ?it/s]

Validation:   0%|          | 0/29 [00:00<?, ?it/s]

Epoch [45/50] - Train Loss: 0.4343 | Dev Loss: 1.6684 | Val Accuracy: 61.26% | Val F1 Score weighted: 0.6138 | Val F1 Score macro: 0.4496 | Time: 1.13 seconds

Epoch 46/50


Training:   0%|          | 0/162 [00:00<?, ?it/s]

Validation:   0%|          | 0/29 [00:00<?, ?it/s]

Epoch [46/50] - Train Loss: 0.4408 | Dev Loss: 1.6586 | Val Accuracy: 62.01% | Val F1 Score weighted: 0.6149 | Val F1 Score macro: 0.4471 | Time: 1.14 seconds

Epoch 47/50


Training:   0%|          | 0/162 [00:00<?, ?it/s]

Validation:   0%|          | 0/29 [00:00<?, ?it/s]

Epoch [47/50] - Train Loss: 0.4283 | Dev Loss: 1.6561 | Val Accuracy: 61.55% | Val F1 Score weighted: 0.6149 | Val F1 Score macro: 0.4495 | Time: 1.43 seconds

Epoch 48/50


Training:   0%|          | 0/162 [00:00<?, ?it/s]

Validation:   0%|          | 0/29 [00:00<?, ?it/s]

Epoch [48/50] - Train Loss: 0.4271 | Dev Loss: 1.6480 | Val Accuracy: 61.82% | Val F1 Score weighted: 0.6195 | Val F1 Score macro: 0.4577 | Time: 1.46 seconds

Epoch 49/50


Training:   0%|          | 0/162 [00:00<?, ?it/s]

Validation:   0%|          | 0/29 [00:00<?, ?it/s]

Epoch [49/50] - Train Loss: 0.4220 | Dev Loss: 1.7098 | Val Accuracy: 61.45% | Val F1 Score weighted: 0.6163 | Val F1 Score macro: 0.4552 | Time: 1.56 seconds

Epoch 50/50


Training:   0%|          | 0/162 [00:00<?, ?it/s]

Validation:   0%|          | 0/29 [00:00<?, ?it/s]

Epoch [50/50] - Train Loss: 0.4167 | Dev Loss: 1.6892 | Val Accuracy: 62.14% | Val F1 Score weighted: 0.6219 | Val F1 Score macro: 0.4598 | Time: 1.28 seconds


In [88]:
best_val_f1_w

0.6622788879330143

# Predict

In [89]:
# load best model
best_model = load("rating_model.pt")
best_model.to(device);

In [90]:
best_model.eval()
predictions = []
with torch.no_grad():
    for inputs in tqdm(test_sent_emb_loader, leave=False):
        inputs = inputs[0].to(device)
        outputs = best_model(inputs)
        predictions.extend(outputs.argmax(1).tolist())

  0%|          | 0/48 [00:00<?, ?it/s]

In [91]:
submission = pd.read_csv("/content/sample_submission.csv")
submission.rate = predictions
submission.rate = submission.rate.map(itol)
submission.head()

Unnamed: 0,index,rate
0,0,5
1,1,5
2,2,5
3,3,4
4,4,1


In [92]:
submission.to_csv("submission.csv", index=False)