In [2]:
!pip install mlflow
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, Subset
from random import sample
from torch.utils.data import DataLoader
from transformers import RobertaModel, RobertaTokenizer
from sklearn.utils import resample
from torchmetrics.classification import (
    MulticlassF1Score,
    MulticlassPrecision,
    MulticlassRecall,
)
from tqdm import tqdm
import mlflow
import time
import pandas as pd
import os

Collecting mlflow
  Downloading mlflow-2.22.0-py3-none-any.whl.metadata (30 kB)
Collecting mlflow-skinny==2.22.0 (from mlflow)
  Downloading mlflow_skinny-2.22.0-py3-none-any.whl.metadata (31 kB)
Collecting graphene<4 (from mlflow)
  Downloading graphene-3.4.3-py2.py3-none-any.whl.metadata (6.9 kB)
Collecting gunicorn<24 (from mlflow)
  Downloading gunicorn-23.0.0-py3-none-any.whl.metadata (4.4 kB)
Collecting databricks-sdk<1,>=0.20.0 (from mlflow-skinny==2.22.0->mlflow)
  Downloading databricks_sdk-0.55.0-py3-none-any.whl.metadata (39 kB)
Collecting fastapi<1 (from mlflow-skinny==2.22.0->mlflow)
  Downloading fastapi-0.115.12-py3-none-any.whl.metadata (27 kB)
Collecting packaging<25 (from mlflow-skinny==2.22.0->mlflow)
  Downloading packaging-24.2-py3-none-any.whl.metadata (3.2 kB)
Collecting uvicorn<1 (from mlflow-skinny==2.22.0->mlflow)
  Downloading uvicorn-0.34.3-py3-none-any.whl.metadata (6.5 kB)
Collecting graphql-core<3.3,>=3.1 (from graphene<4->mlflow)
  Downloading graphql_co

2025-06-05 11:25:10.660908: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1749122710.843956      35 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1749122710.898739      35 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [3]:
LABEL_MAPPING = {
    "pants-fire": 0,
    "false": 0,
    "barely-true": 0,
    "half-true": 1,
    "mostly-true": 1,
    "true": 1,
}

ids2labels = [
    # "pants-fire",
    "false",
    # "barely-true",
    # "half-true",
    # "mostly-true",
    "true",
]


def save_checkpoint(model, optimizer, epoch, val_acc, path="checkpoint.pth"):
    checkpoint = {
        "model_state_dict": model.state_for_save(),
        "optimizer_state_dict": optimizer.state_dict(),
        "epoch": epoch,
        "val_acc": val_acc,
    }
    torch.save(checkpoint, path)
    print(
        f"Checkpoint saved at epoch {epoch} "
        f"with validation accuracy {val_acc:.4f}"
    )


def load_checkpoint(
    model, optimizer, path="checkpoint.pth", resume=False, reset_epoch=False
):
    if not resume:
        print("Resume is False. Starting from scratch.")
        return 0, 0  # Start fresh

    if os.path.exists(path):
        checkpoint = torch.load(path)
        model.load_state_from_save(checkpoint["model_state_dict"])
        optimizer.load_state_dict(checkpoint["optimizer_state_dict"])
        epoch = checkpoint["epoch"]
        val_acc = checkpoint["val_acc"]
        if reset_epoch:
            print(
                f"Checkpoint loaded: Starting from initial"
                f"epoch, validation accuracy {val_acc:.4f}"
            )
            return 0, val_acc  # Start fresh with existing model
        else:
            print(
                f"Checkpoint loaded: Resuming from epoch "
                f"{epoch+1}, validation accuracy {val_acc:.4f}"
            )
            return epoch + 1, val_acc  # Next epoch to train
    else:
        print("No checkpoint found. Starting from scratch.")
        return 0, 0  # Start fresh


def save_best_model(model, optimizer, epoch, val_acc, path="best_model.pth"):
    best_model = {
        "model_state_dict": model.state_for_save(),
        "optimizer_state_dict": optimizer.state_dict(),
        "epoch": epoch,
        "val_acc": val_acc,
    }
    torch.save(best_model, path)
    print(
        f"Best model saved at epoch {epoch} "
        f"with validation accuracy {val_acc:.4f}"
    )


def load_best_model(model, path="best_model.pth"):
    if os.path.exists(path):
        best_model = torch.load(path)
        model.load_state_from_save(best_model["model_state_dict"])
        print("Model loaded from best model checkpoint.")
    else:
        print("No best model checkpoint found.")

def save_model_remotely(local_path, remote_path, creds):
    pass


In [10]:
class LiarPlusSingleRobertaDataset(Dataset):
    def __init__(
        self,
        filepath: str,
        tokenizer,
        str_metadata_cols: list[str],
        num_metadata_cols: list[str],
        max_length: int = 512,
    ):
        self.df = pd.read_csv(filepath)

        self.str_metadata_cols = str_metadata_cols
        self.num_metadata_cols = num_metadata_cols

        for column in self.str_metadata_cols:
            self.df[column] = self.df[column].astype(str)

        self.df["statement"] = self.df["statement"].astype(str)
        self.df["justification"] = self.df["justification"].astype(str)
        self.df["articles"] = self.df["articles"].astype(str)

        self.statement_max_len = max_length // 4
        self.justification_max_len = max_length // 4
        self.article_max_len = max_length // 4
        self.str_metadata_max_len = (
            max_length - self.statement_max_len - self.justification_max_len - self.article_max_len
        ) // len(str_metadata_cols)

        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.df.index)

    def limit_tokens(self, text, max_length=512):
        tokenized = self.tokenizer.tokenize(text)[:max_length]
        return self.tokenizer.decode(
            self.tokenizer.convert_tokens_to_ids(tokenized)
        )

    def __getitem__(self, index: int):
        item = self.df.iloc[index]

        input = self.limit_tokens(
            f"[STATEMENT] {item['statement']}", self.statement_max_len
        )
        input += self.limit_tokens(
            f" [JUSTIFICATION] {item['justification']}",
            self.justification_max_len,
        )
        input += self.limit_tokens(
            f" [ARTICLE] {item['articles']}",
            self.article_max_len,
        )

        for column in self.str_metadata_cols:
            if column in ["subject", "job_title", "context", "speaker"]:
                input += self.limit_tokens(f" [{column.upper()}] {item[column]}", 15)
            else:
                input += self.limit_tokens(f" [{column.upper()}] {item[column]}")

        token_count = len(self.tokenizer.tokenize(input))
        if token_count > self.max_length:
            print(f"Liczba token√≥w przed kodowaniem: {token_count}")
            print(f"Tekst: {input}")
            
        encoded = self.tokenizer(
            input,
            truncation=True,
            padding="max_length",
            max_length=self.max_length,
            return_tensors="pt",
        )

        label = LABEL_MAPPING[item["label"]]

        num_metadata = [item[column] for column in self.num_metadata_cols]

        return {
            "input_ids": encoded["input_ids"].squeeze(0),
            "attention_mask": encoded["attention_mask"].squeeze(0),
            "num_metadata": torch.tensor(num_metadata).float(),
            "label": torch.tensor(label),
        }

In [5]:
import torch
import torch.nn as nn
import torch.nn.functional as F


class LiarPlusSingleFinetunedRoBERTasClassifier(nn.Module):
    def __init__(
        self, encoder_model, num_metadata_len, num_hidden, num_classes
    ):
        super(LiarPlusSingleFinetunedRoBERTasClassifier, self).__init__()
        self.encoder = encoder_model
        self.hl = nn.Linear(
            self.encoder.config.hidden_size + num_metadata_len, num_hidden
        )
        self.dropout = nn.Dropout(p=0.1)
        self.fc = nn.Linear(num_hidden, num_classes)

    def forward(self, input_ids, attention_mask, num_metadata):
        outputs = self.encoder(
            input_ids=input_ids, attention_mask=attention_mask
        )

        cls_embedding = outputs.pooler_output
        concatted_inputs = torch.cat([cls_embedding, num_metadata], dim=1)

        hl_output = F.gelu(self.hl(concatted_inputs))
        hl_output = self.dropout(hl_output)

        logits = self.fc(hl_output)
        return logits

    def roberta_trainable_state(self):
        return {
            name: param for name, param in self.encoder.named_parameters() if param.requires_grad
        }
    
    def load_roberta_trainable_state(self, state_dict):
        self.encoder.load_state_dict(state_dict, strict=False)

    # Zapisz tylko wagi warstw klasyfikatora
    def state_for_save(self):
        return {
            'hl_state_dict': self.hl.state_dict(),
            'fc_state_dict': self.fc.state_dict(),
            'roberta_trainable': self.roberta_trainable_state(),
        }
        
    # ≈Åadowanie modelu (tylko wagi klasyfikatora)
    def load_state_from_save(self, state):
        self.hl.load_state_dict(state['hl_state_dict'])
        self.fc.load_state_dict(state['fc_state_dict'])
        if 'roberta_trainable' in state:
            self.load_roberta_trainable_state(state['roberta_trainable'])

In [6]:
def test(
    model: nn.Module,
    best_model_path: str,
    dataloader: DataLoader
) -> None:
    # Define loss function
    criterion = nn.CrossEntropyLoss()

    load_best_model(model, best_model_path)
    
    model.eval()  # Set model to evaluation mode
    total_loss = 0.0
    total_correct = 0
    total_samples = 0

    f1 = MulticlassF1Score(num_classes, average=None).to(device)
    precision = MulticlassPrecision(num_classes, average=None).to(device)
    recall = MulticlassRecall(num_classes, average=None).to(device)

    with torch.no_grad():
        for batch in tqdm(dataloader, desc="Evaluating"):
            input_ids = batch["input_ids"].to(device)
            attention_mask = batch["attention_mask"].to(device)
            num_metadata = batch["num_metadata"].to(device)
            labels = batch["label"].to(device)

            outputs = model(input_ids, attention_mask, num_metadata)
            loss = criterion(outputs, labels)
            total_loss += loss.item() * input_ids.size(0)

            preds = torch.argmax(outputs, dim=1)
            total_correct += (preds == labels).sum().item()
            total_samples += input_ids.size(0)

            f1.update(preds, labels)
            precision.update(preds, labels)
            recall.update(preds, labels)

    avg_loss = total_loss / total_samples
    accuracy = total_correct / total_samples

    f1_res = f1.compute()
    precision_res = precision.compute()
    recall_res = recall.compute()

    mlflow.log_metric("test_acc", accuracy)
    mlflow.log_metric("test_loss", accuracy)

    for i in range(num_classes):
        mlflow.log_metric(f"test_f1_{ids2labels[i]}", f1_res[i])
        mlflow.log_metric(f"test_precision_{ids2labels[i]}", precision_res[i])
        mlflow.log_metric(f"test_recall_{ids2labels[i]}", recall_res[i])
    
    macro_f1 = f1_res.mean()
    macro_precision = precision_res.mean()
    macro_recall = recall_res.mean()

    mlflow.log_metric("test_f1", macro_f1)
    mlflow.log_metric("test_precision", macro_precision)
    mlflow.log_metric("test_recall", macro_recall)

    print(
        f"Test Loss: {avg_loss:.4f}, "
        f"Test Accuracy: {accuracy:.4f}, "
        f"Test F1: {f1_res} (marcro = {macro_f1:.4f}), "
        f"Test Precision: {precision_res} (marcro = {macro_precision:.4f}), "
        f"Test Recall: {recall_res} (marcro = {macro_recall:.4f}), "
    )

In [8]:
def train(
    creds: dict,
    model: nn.Module,
    save_path: str,
    remote_models_path: str,
    best_model_path: str,
    train_loader: DataLoader,
    val_loader: DataLoader,
    test_loader: DataLoader,
    batch_size: int,
    num_classes: int,
    lr=1e-3,
    encoder_lr=1e-5,
    epochs=30,
    patience=5,
    resume: bool = False,
    reset_epoch: bool = False,
) -> None:
    dev_name = "cuda" if torch.cuda.is_available() else "cpu"
    print(f"Using device {dev_name}")
    device = torch.device(dev_name)

    # Define optimizer and loss function
    # Train only the classifier
    optimizer = torch.optim.AdamW([
        {'params': model.encoder.parameters(), 'lr': encoder_lr},  # ni≈ºsze LR dla encodera
        {'params': model.hl.parameters(), 'lr': lr},
        {'params': model.fc.parameters(), 'lr': lr},
    ])
    criterion = nn.CrossEntropyLoss()

    # Checkpoint Path
    checkpoint_path = f"checkpoint_{patience}.pth"

    checkpoint_send_interval = 5

    # Track best loss for model saving
    # Load Checkpoint (Decide if you want to continue)
    start_epoch, best_val_accuracy = load_checkpoint(
        model,
        optimizer,
        checkpoint_path,
        resume,
        reset_epoch
    )

    patience_counter = 0

    f1 = MulticlassF1Score(num_classes, average=None).to(device)
    precision = MulticlassPrecision(num_classes, average=None).to(device)
    recall = MulticlassRecall(num_classes, average=None).to(device)

    # Training loop
    for epoch in range(start_epoch, epochs):
        model.train()
        epoch_loss = 0

        train_accuracy = 0

        for batch in tqdm(
            train_loader, desc=f"Epoch {epoch+1}", leave=False
        ):
            input_ids = batch["input_ids"].to(device)
            attention_mask = batch["attention_mask"].to(device)
            num_metadata = batch["num_metadata"].to(device)
            labels = batch["label"].to(device)

            optimizer.zero_grad()
            outputs = model(input_ids, attention_mask, num_metadata)
            loss = criterion(
                outputs, labels
            )  # mo≈ºna spr√≥bowaƒá to logowaƒá jako osobny wykres do debugowania
            loss.backward()
            optimizer.step()

            epoch_loss += loss.item()

            # Calculate accuracy
            preds = torch.argmax(outputs, dim=-1)
            train_accuracy += (preds == labels).sum().item()

            f1.update(preds, labels)
            precision.update(preds, labels)
            recall.update(preds, labels)

        avg_loss = epoch_loss / len(train_loader)
        avg_train_accuracy = train_accuracy / len(train_loader.dataset)
        mlflow.log_metric("train_loss", avg_loss, step=epoch)
        mlflow.log_metric("train_acc", avg_train_accuracy, step=epoch)

        f1_res = f1.compute()
        precision_res = precision.compute()
        recall_res = recall.compute()

        for i in range(num_classes):
            mlflow.log_metric(
                f"train_f1_{ids2labels[i]}", f1_res[i], step=epoch
            )
            mlflow.log_metric(
                f"train_precision_{ids2labels[i]}",
                precision_res[i],
                step=epoch,
            )
            mlflow.log_metric(
                f"train_recall_{ids2labels[i]}", recall_res[i], step=epoch
            )

        macro_f1 = f1_res.mean()
        macro_precision = precision_res.mean()
        macro_recall = recall_res.mean()

        mlflow.log_metric("train_f1", macro_f1, step=epoch)
        mlflow.log_metric("train_precision", macro_precision, step=epoch)
        mlflow.log_metric("train_recall", macro_recall, step=epoch)

        tqdm.write(
            f"Epoch {epoch+1}: "
            f"Training Loss: {avg_loss}, "
            f"Training Accuracy: {avg_train_accuracy}, "
            f"Training F1: {macro_f1}, "
            f"Training Precision: {macro_precision}, "
            f"Training Recall: {macro_recall}"
        )

        # Validation step
        model.eval()  # Switch to evaluation mode
        val_loss = 0
        val_accuracy = 0

        f1.reset()
        precision.reset()
        recall.reset()

        with torch.no_grad():
            for batch in tqdm(
                val_loader,
                desc=f"Validation of epoch {epoch + 1}",
                leave=False,
            ):
                input_ids = batch["input_ids"].to(device)
                attention_mask = batch["attention_mask"].to(device)
                num_metadata = batch["num_metadata"].to(device)
                labels = batch["label"].to(device)

                outputs = model(input_ids, attention_mask, num_metadata)
                loss = criterion(outputs, labels)
                val_loss += loss.item()

                # Calculate accuracy
                preds = torch.argmax(outputs, dim=-1)
                val_accuracy += (preds == labels).sum().item()
                f1.update(preds, labels)
                precision.update(preds, labels)
                recall.update(preds, labels)

        avg_val_loss = val_loss / len(val_loader)
        avg_val_accuracy = val_accuracy / len(val_loader.dataset)
        mlflow.log_metric("val_loss", avg_val_loss, step=epoch)
        mlflow.log_metric("val_acc", avg_val_accuracy, step=epoch)

        f1_res = f1.compute()
        precision_res = precision.compute()
        recall_res = recall.compute()

        for i in range(num_classes):
            mlflow.log_metric(
                f"val_f1_{ids2labels[i]}", f1_res[i], step=epoch
            )
            mlflow.log_metric(
                f"val_precision_{ids2labels[i]}",
                precision_res[i],
                step=epoch,
            )
            mlflow.log_metric(
                f"val_recall_{ids2labels[i]}", recall_res[i], step=epoch
            )

        macro_f1 = f1_res.mean()
        macro_precision = precision_res.mean()
        macro_recall = recall_res.mean()

        mlflow.log_metric("val_f1", macro_f1, step=epoch)
        mlflow.log_metric("val_precision", macro_precision, step=epoch)
        mlflow.log_metric("val_recall", macro_recall, step=epoch)

        print(
            f"Epoch {epoch+1}: "
            f"Validation Loss: {avg_val_loss}, "
            f"Validation Accuracy: {avg_val_accuracy}, "
            f"Validation F1: {macro_f1}, "
            f"Validation Precision: {macro_precision}, "
            f"Validation Recall: {macro_recall}"
        )

        save_checkpoint(
            model, optimizer, epoch, avg_val_accuracy, checkpoint_path
        )
        if (epoch + 1) % checkpoint_send_interval == 0:# and epoch != 0:
            save_model_remotely(checkpoint_path, remote_models_path, creds)

        # Check for early stopping
        if avg_val_accuracy > best_val_accuracy:
            best_val_accuracy = avg_val_accuracy
            patience_counter = 0
            # Save the best model
            save_best_model(
                model,
                optimizer,
                epoch,
                best_val_accuracy,
                best_model_path
            )
            save_model_remotely(best_model_path, remote_models_path, creds)
        else:
            patience_counter += 1
            if patience_counter >= patience:
                print(f"Early stopping at epoch {epoch+1}")
                break

    # Log final checkpoint
    save_model_remotely(checkpoint_path, remote_models_path, creds)

In [11]:
mlflow_uri = "http://cimmerian.win:5000"
resume = False
reset_epoch = False

creds = {
    'hostname': "cimmerian.win",
    'port': 22,
    'username': "",
    'password': ""
}

mlflow.set_tracking_uri(uri=mlflow_uri)

# MLflow experiment setup
mlflow.set_experiment("BinTest")

# Load RoBERTa tokenizer and model
tokenizer = RobertaTokenizer.from_pretrained("roberta-base")
roberta = RobertaModel.from_pretrained("roberta-base")

# trenuje 2 ostatnie warstwy
for name, param in roberta.named_parameters():
    if name.startswith("encoder.layer.11") or name.startswith("pooler"):
        param.requires_grad = True
    else:
        param.requires_grad = False


# Hyperparameters
num_classes = 2
lr = 1e-3
encoder_lr = 1e-5
epochs = 30
hidden_size = 128
# Number of epochs to wait before stopping if no improvement
patience = 10

# Save path
save_path = "/kaggle/working"
# Remote models path
remote_models_path = "/home/conan/models/single_finetuned_roberta/"
# Best model path
best_model_path = f"{save_path}/best_model_{patience}.pth"

# mo≈ºna przetestowaƒá zach≈Çannie
# dodajemy kolumnƒô jak poprawia i nie dodajemy jak nie poprawia
text_columns = [
    "subject",
    "speaker",
    "job_title",
    "state",
    "party_affiliation",
    "context",
    "sentiment",
    "question",
    "curse",
    "emotion",
    "gibberish",
    "offensiveness",
    "political_bias"
]
num_metadata_cols = [
    "barely_true_counts",
    "false_counts",
    "half_true_counts",
    "mostly_true_counts",
    "pants_on_fire_counts",
    "grammar_errors",
    "ratio_of_capital_letters",
    "statement_length"
]

#subset_size = 1000
#random_state = 42

# speedup the experiments
# mo≈ºna ustawiƒá epochs na 1 i sprawdziƒá czy w ramach jednej epoki val loss spada
training_data = LiarPlusSingleRobertaDataset(
    "/kaggle/input/articles/train2.csv",
    tokenizer,
    text_columns,
    num_metadata_cols
)
validation_data = LiarPlusSingleRobertaDataset(
    "/kaggle/input/articles/val2.csv",
    tokenizer,
    text_columns,
    num_metadata_cols,
)
test_data = LiarPlusSingleRobertaDataset(
    "/kaggle/input/articles/test2.csv",
    tokenizer,
    text_columns,
    num_metadata_cols,
)

batch_size = 64

#training_data_subset = Subset(training_data, sample(range(len(training_data)), k=1000))

train_dataloader = DataLoader(
    training_data, batch_size=batch_size, shuffle=True
)
val_dataloader = DataLoader(
    validation_data, batch_size=batch_size, shuffle=True
)
test_dataloader = DataLoader(
    test_data, batch_size=batch_size, shuffle=True
)

# Instantiate model
model = LiarPlusSingleFinetunedRoBERTasClassifier(
    roberta,
    len(num_metadata_cols),
    hidden_size,
    num_classes,
)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

start = time.time()
with mlflow.start_run():
    mlflow.log_param("learning_rate", lr)
    mlflow.log_param("batch_size", batch_size)
    mlflow.log_param("epochs", epochs)
    mlflow.log_param("resume", resume)
    mlflow.log_param("reset_epoch", reset_epoch)
    mlflow.log_param("patience", patience)
    
    # Train the model
    train(
        creds,
        model,
        save_path,
        remote_models_path,
        best_model_path,
        train_dataloader,
        val_dataloader,
        test_dataloader,
        batch_size,
        num_classes,
        lr,
        encoder_lr,
        epochs,
        patience,
        resume,
        reset_epoch,
    )
    # Evaluate on test dataset
    test(model, best_model_path, test_dataloader)
end = time.time()
print(f"Total time took training: {end-start}s")

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Using device cuda
Resume is False. Starting from scratch.


                                                          

Epoch 1: Training Loss: 0.6729107051162246, Training Accuracy: 0.5899308598695102, Training F1: 0.5262085795402527, Training Precision: 0.582871675491333, Training Recall: 0.5529206991195679


                                                                      

Epoch 1: Validation Loss: 0.6796286702156067, Validation Accuracy: 0.5794392523364486, Validation F1: 0.5035556554794312, Validation Precision: 0.6430803537368774, Validation Recall: 0.5645316243171692
Checkpoint saved at epoch 0 with validation accuracy 0.5794
Best model saved at epoch 0 with validation accuracy 0.5794


                                                          

Epoch 2: Training Loss: 0.6617502562747979, Training Accuracy: 0.6022007985198169, Training F1: 0.5322561860084534, Training Precision: 0.6071645021438599, Training Recall: 0.5634990930557251


                                                                      

Epoch 2: Validation Loss: 0.655973616100493, Validation Accuracy: 0.5965732087227414, Validation F1: 0.5546383261680603, Validation Precision: 0.6265447735786438, Validation Recall: 0.5851057767868042
Checkpoint saved at epoch 1 with validation accuracy 0.5966
Best model saved at epoch 1 with validation accuracy 0.5966


                                                          

Epoch 3: Training Loss: 0.6558837146492478, Training Accuracy: 0.6120362255331581, Training F1: 0.5559957027435303, Training Precision: 0.6166000366210938, Training Recall: 0.5777639150619507


                                                                      

Epoch 3: Validation Loss: 0.6378288127127147, Validation Accuracy: 0.6098130841121495, Validation F1: 0.5712671875953674, Validation Precision: 0.6432983875274658, Validation Recall: 0.5986517071723938
Checkpoint saved at epoch 2 with validation accuracy 0.6098
Best model saved at epoch 2 with validation accuracy 0.6098


                                                          

Epoch 4: Training Loss: 0.6426626162499375, Training Accuracy: 0.6277144804752167, Training F1: 0.5927593111991882, Training Precision: 0.6255356073379517, Training Recall: 0.6009738445281982


                                                                      

Epoch 4: Validation Loss: 0.65128246091661, Validation Accuracy: 0.6199376947040498, Validation F1: 0.5790888071060181, Validation Precision: 0.6636943221092224, Validation Recall: 0.608319103717804
Checkpoint saved at epoch 3 with validation accuracy 0.6199
Best model saved at epoch 3 with validation accuracy 0.6199


                                                          

Epoch 5: Training Loss: 0.6376951111029394, Training Accuracy: 0.6339468302658486, Training F1: 0.6084622144699097, Training Precision: 0.6292052268981934, Training Recall: 0.6120702028274536


                                                                      

Epoch 5: Validation Loss: 0.641625926608131, Validation Accuracy: 0.6051401869158879, Validation F1: 0.5455362200737, Validation Precision: 0.6714705228805542, Validation Recall: 0.5914437174797058
Checkpoint saved at epoch 4 with validation accuracy 0.6051


                                                          

Epoch 6: Training Loss: 0.6332209895116202, Training Accuracy: 0.6351153958515922, Training F1: 0.6088926792144775, Training Precision: 0.6280235648155212, Training Recall: 0.6120707988739014


                                                                      

Epoch 6: Validation Loss: 0.6399075729506356, Validation Accuracy: 0.6214953271028038, Validation F1: 0.5803616642951965, Validation Precision: 0.666875958442688, Validation Recall: 0.6098160743713379
Checkpoint saved at epoch 5 with validation accuracy 0.6215
Best model saved at epoch 5 with validation accuracy 0.6215


                                                          

Epoch 7: Training Loss: 0.6287578995923818, Training Accuracy: 0.6479696172947707, Training F1: 0.6224017143249512, Training Precision: 0.6436811685562134, Training Recall: 0.6251586675643921


                                                                      

Epoch 7: Validation Loss: 0.6053326144104912, Validation Accuracy: 0.6651090342679128, Validation F1: 0.6614214777946472, Validation Precision: 0.6664119958877563, Validation Recall: 0.6619682312011719
Checkpoint saved at epoch 6 with validation accuracy 0.6651
Best model saved at epoch 6 with validation accuracy 0.6651


                                                          

Epoch 8: Training Loss: 0.6225764048025475, Training Accuracy: 0.6542993475508813, Training F1: 0.6399497389793396, Training Precision: 0.6519759893417358, Training Recall: 0.6400742530822754


                                                                      

Epoch 8: Validation Loss: 0.6217031762713477, Validation Accuracy: 0.6503115264797508, Validation F1: 0.6316509246826172, Validation Precision: 0.6706555485725403, Validation Recall: 0.642249584197998
Checkpoint saved at epoch 7 with validation accuracy 0.6503


                                                          

Epoch 9: Training Loss: 0.6215918726802613, Training Accuracy: 0.6524491187067875, Training F1: 0.6345028281211853, Training Precision: 0.6493346691131592, Training Recall: 0.6353104114532471


                                                                      

Epoch 9: Validation Loss: 0.6083511270227886, Validation Accuracy: 0.6728971962616822, Validation F1: 0.6649293899536133, Validation Precision: 0.6804224252700806, Validation Recall: 0.6677473187446594
Checkpoint saved at epoch 8 with validation accuracy 0.6729
Best model saved at epoch 8 with validation accuracy 0.6729


                                                           

Epoch 10: Training Loss: 0.6214640762494958, Training Accuracy: 0.652546499172266, Training F1: 0.6383160948753357, Training Precision: 0.6516610383987427, Training Recall: 0.6387075781822205


                                                                       

Epoch 10: Validation Loss: 0.606431816305433, Validation Accuracy: 0.6682242990654206, Validation F1: 0.6643470525741577, Validation Precision: 0.669861376285553, Validation Recall: 0.6649622917175293
Checkpoint saved at epoch 9 with validation accuracy 0.6682


                                                           

Epoch 11: Training Loss: 0.6129231671368853, Training Accuracy: 0.6655954815464018, Training F1: 0.6515307426452637, Training Precision: 0.6629800796508789, Training Recall: 0.6511642336845398


                                                                       

Epoch 11: Validation Loss: 0.6077947772684551, Validation Accuracy: 0.6549844236760125, Validation F1: 0.6343529224395752, Validation Precision: 0.6802209615707397, Validation Recall: 0.6464247107505798
Checkpoint saved at epoch 10 with validation accuracy 0.6550


                                                           

Epoch 12: Training Loss: 0.6136566431996244, Training Accuracy: 0.6654007206154445, Training F1: 0.6483240127563477, Training Precision: 0.6620397567749023, Training Recall: 0.6483591794967651


                                                                       

Epoch 12: Validation Loss: 0.5959302016666957, Validation Accuracy: 0.6744548286604362, Validation F1: 0.6694496273994446, Validation Precision: 0.6778081655502319, Validation Recall: 0.6705712080001831
Checkpoint saved at epoch 11 with validation accuracy 0.6745
Best model saved at epoch 11 with validation accuracy 0.6745


                                                           

Epoch 13: Training Loss: 0.6095357937101992, Training Accuracy: 0.6644269159606583, Training F1: 0.6507052183151245, Training Precision: 0.6628632545471191, Training Recall: 0.6504506468772888


                                                                       

Epoch 13: Validation Loss: 0.5985927922385079, Validation Accuracy: 0.6752336448598131, Validation F1: 0.6621152758598328, Validation Precision: 0.6920889616012573, Validation Recall: 0.6683499813079834
Checkpoint saved at epoch 12 with validation accuracy 0.6752
Best model saved at epoch 12 with validation accuracy 0.6752


                                                           

Epoch 14: Training Loss: 0.6059402497658818, Training Accuracy: 0.6715356899405979, Training F1: 0.6572667360305786, Training Precision: 0.6699730753898621, Training Recall: 0.6568408012390137


                                                                       

Epoch 14: Validation Loss: 0.6517452257020133, Validation Accuracy: 0.633177570093458, Validation F1: 0.5836125612258911, Validation Precision: 0.7076172828674316, Validation Recall: 0.6202222108840942
Checkpoint saved at epoch 13 with validation accuracy 0.6332


                                                           

Epoch 15: Training Loss: 0.6028091017133701, Training Accuracy: 0.6724121141299055, Training F1: 0.6494397521018982, Training Precision: 0.6683306097984314, Training Recall: 0.6501632928848267


                                                                       

Epoch 15: Validation Loss: 0.5982416683719272, Validation Accuracy: 0.6791277258566978, Validation F1: 0.6662729978561401, Validation Precision: 0.69632887840271, Validation Recall: 0.6722820401191711
Checkpoint saved at epoch 14 with validation accuracy 0.6791
Best model saved at epoch 14 with validation accuracy 0.6791


                                                           

Epoch 16: Training Loss: 0.5994896922052276, Training Accuracy: 0.679326127178888, Training F1: 0.6652896404266357, Training Precision: 0.6778062582015991, Training Recall: 0.6645686626434326


                                                                       

Epoch 16: Validation Loss: 0.6114729061013177, Validation Accuracy: 0.6736760124610592, Validation F1: 0.6552295684814453, Validation Precision: 0.7009311318397522, Validation Recall: 0.6653996706008911
Checkpoint saved at epoch 15 with validation accuracy 0.6737


                                                           

Epoch 17: Training Loss: 0.6010037938260143, Training Accuracy: 0.6729963969227772, Training F1: 0.6585570573806763, Training Precision: 0.6711324453353882, Training Recall: 0.658071756362915


                                                                       

Epoch 17: Validation Loss: 0.5825638274351755, Validation Accuracy: 0.6869158878504673, Validation F1: 0.6749858260154724, Validation Precision: 0.7039234638214111, Validation Recall: 0.6802725791931152
Checkpoint saved at epoch 16 with validation accuracy 0.6869
Best model saved at epoch 16 with validation accuracy 0.6869


                                                           

Epoch 18: Training Loss: 0.5984769067408876, Training Accuracy: 0.679033985782452, Training F1: 0.6659344434738159, Training Precision: 0.6784606575965881, Training Recall: 0.665192723274231


                                                                       

Epoch 18: Validation Loss: 0.5887899228504726, Validation Accuracy: 0.6861370716510904, Validation F1: 0.6794833540916443, Validation Precision: 0.6929575204849243, Validation Recall: 0.6814196109771729
Checkpoint saved at epoch 17 with validation accuracy 0.6861


                                                           

Epoch 19: Training Loss: 0.5910471940632933, Training Accuracy: 0.6824423020742039, Training F1: 0.6698944568634033, Training Precision: 0.6810265779495239, Training Recall: 0.6689205169677734


                                                                       

Epoch 19: Validation Loss: 0.5950447548003424, Validation Accuracy: 0.6736760124610592, Validation F1: 0.6598352789878845, Validation Precision: 0.6915584802627563, Validation Recall: 0.666600227355957
Checkpoint saved at epoch 18 with validation accuracy 0.6737


                                                           

Epoch 20: Training Loss: 0.591275509840213, Training Accuracy: 0.6848768137111695, Training F1: 0.6704332828521729, Training Precision: 0.6820704936981201, Training Recall: 0.6694782376289368


                                                                       

Epoch 20: Validation Loss: 0.6115226802371797, Validation Accuracy: 0.6736760124610592, Validation F1: 0.6446448564529419, Validation Precision: 0.7269477844238281, Validation Recall: 0.6631882190704346
Checkpoint saved at epoch 19 with validation accuracy 0.6737


                                                           

Epoch 21: Training Loss: 0.5884172181535211, Training Accuracy: 0.6851689551076054, Training F1: 0.6688657999038696, Training Precision: 0.6838197708129883, Training Recall: 0.6682401895523071


                                                                       

Epoch 21: Validation Loss: 0.6288137379146758, Validation Accuracy: 0.6627725856697819, Validation F1: 0.6275221109390259, Validation Precision: 0.7256932258605957, Validation Recall: 0.651382327079773
Checkpoint saved at epoch 20 with validation accuracy 0.6628


                                                           

Epoch 22: Training Loss: 0.5886840812908196, Training Accuracy: 0.6857532379004772, Training F1: 0.6678569316864014, Training Precision: 0.6832948923110962, Training Recall: 0.6673167943954468


                                                                       

Epoch 22: Validation Loss: 0.5942340138412657, Validation Accuracy: 0.6705607476635514, Validation F1: 0.6477119326591492, Validation Precision: 0.7064478993415833, Validation Recall: 0.6613315343856812
Checkpoint saved at epoch 21 with validation accuracy 0.6706


                                                           

Epoch 23: Training Loss: 0.5835386510961544, Training Accuracy: 0.6889667932612717, Training F1: 0.6733119487762451, Training Precision: 0.6860870122909546, Training Recall: 0.6723479628562927


                                                                       

Epoch 23: Validation Loss: 0.5850948847475506, Validation Accuracy: 0.6853582554517134, Validation F1: 0.6705752015113831, Validation Precision: 0.7081626653671265, Validation Recall: 0.677890956401825
Checkpoint saved at epoch 22 with validation accuracy 0.6854


                                                           

Epoch 24: Training Loss: 0.583578751509234, Training Accuracy: 0.6904275002434511, Training F1: 0.6760706901550293, Training Precision: 0.6895761489868164, Training Recall: 0.6750683188438416


                                                                       

Epoch 24: Validation Loss: 0.5742491057940892, Validation Accuracy: 0.6892523364485982, Validation F1: 0.6837741136550903, Validation Precision: 0.6943862438201904, Validation Recall: 0.6850454807281494
Checkpoint saved at epoch 23 with validation accuracy 0.6893
Best model saved at epoch 23 with validation accuracy 0.6893


                                                           

Epoch 25: Training Loss: 0.5801273147511926, Training Accuracy: 0.6920829681565878, Training F1: 0.6797953844070435, Training Precision: 0.6902142763137817, Training Recall: 0.6785093545913696


                                                                       

Epoch 25: Validation Loss: 0.6135512093702952, Validation Accuracy: 0.6814641744548287, Validation F1: 0.6621502637863159, Validation Precision: 0.7135034799575806, Validation Recall: 0.6728847026824951
Checkpoint saved at epoch 24 with validation accuracy 0.6815


                                                           

Epoch 26: Training Loss: 0.5737825156368824, Training Accuracy: 0.6979257960853053, Training F1: 0.6836675405502319, Training Precision: 0.6954185962677002, Training Recall: 0.682330846786499


                                                                       

Epoch 26: Validation Loss: 0.5809273975236076, Validation Accuracy: 0.6900311526479751, Validation F1: 0.6736104488372803, Validation Precision: 0.71808922290802, Validation Recall: 0.6820660829544067
Checkpoint saved at epoch 25 with validation accuracy 0.6900
Best model saved at epoch 25 with validation accuracy 0.6900


                                                           

Epoch 27: Training Loss: 0.5722284568762928, Training Accuracy: 0.6986074593436556, Training F1: 0.6862350702285767, Training Precision: 0.6962992548942566, Training Recall: 0.6847761869430542


                                                                       

Epoch 27: Validation Loss: 0.6103212606339228, Validation Accuracy: 0.6736760124610592, Validation F1: 0.6516307592391968, Validation Precision: 0.7090475559234619, Validation Recall: 0.6645783185958862
Checkpoint saved at epoch 26 with validation accuracy 0.6737


                                                           

Epoch 28: Training Loss: 0.5683488936527915, Training Accuracy: 0.7026000584282793, Training F1: 0.6874240636825562, Training Precision: 0.6986896991729736, Training Recall: 0.6859662532806396


                                                                       

Epoch 28: Validation Loss: 0.5994200848397755, Validation Accuracy: 0.690809968847352, Validation F1: 0.6804678440093994, Validation Precision: 0.705274224281311, Validation Recall: 0.6846469640731812
Checkpoint saved at epoch 27 with validation accuracy 0.6908
Best model saved at epoch 27 with validation accuracy 0.6908


                                                           

Epoch 29: Training Loss: 0.5635636039772389, Training Accuracy: 0.7045476677378518, Training F1: 0.6916558742523193, Training Precision: 0.7022095322608948, Training Recall: 0.6900734901428223


                                                                       

Epoch 29: Validation Loss: 0.5971421968369257, Validation Accuracy: 0.6876947040498442, Validation F1: 0.6733580827713013, Validation Precision: 0.7101354598999023, Validation Recall: 0.6803260445594788
Checkpoint saved at epoch 28 with validation accuracy 0.6877


                                                           

Epoch 30: Training Loss: 0.5611359156807016, Training Accuracy: 0.7075664621676891, Training F1: 0.6944898962974548, Training Precision: 0.7043565511703491, Training Recall: 0.6928346753120422


                                                                       

Epoch 30: Validation Loss: 0.5887408710661388, Validation Accuracy: 0.690809968847352, Validation F1: 0.681014895439148, Validation Precision: 0.7041480541229248, Validation Recall: 0.6848365068435669
Checkpoint saved at epoch 29 with validation accuracy 0.6908
Model loaded from best model checkpoint.


Evaluating: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 21/21 [00:29<00:00,  1.40s/it]


Test Loss: 0.6115, Test Accuracy: 0.6742, Test F1: tensor([0.5795, 0.7341], device='cuda:0') (marcro = 0.6568), Test Precision: tensor([0.6575, 0.6828], device='cuda:0') (marcro = 0.6702), Test Recall: tensor([0.5180, 0.7937], device='cuda:0') (marcro = 0.6558), 
üèÉ View run unleashed-croc-404 at: http://cimmerian.win:5000/#/experiments/33/runs/73c400df94dd46e6898fa8bd48a8e529
üß™ View experiment at: http://cimmerian.win:5000/#/experiments/33
Total time took training: 9464.941684484482s
