In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
splits = [10,20,30]
num_classes = len(splits)+1

In [3]:
from dataset import YouTubeDataset
dataset = YouTubeDataset(splits)

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
from models.single_modality_classifiers import RobertaClassifier
model = RobertaClassifier(num_classes)

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaModel: ['lm_head.bias', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.decoder.weight', 'lm_head.layer_norm.bias', 'lm_head.dense.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Initialization success if you see a tensor: tensor([[ 0.1946,  0.2515, -0.0769,  0.0392]], grad_fn=<AddmmBackward0>).


In [5]:
# import torch
# checkpoint = torch.load("checkpoints/epoch200.pt",map_location=torch.device('cpu'))
# model.load_state_dict(checkpoint['model_state_dict'])


In [6]:
# model.base.requires_grad = False

In [7]:
device = 'cuda:2'

In [8]:
import torch
import math
import os
import time
import numpy as np
from sklearn.utils.class_weight import compute_class_weight
from inference import eval, get_scores
from torch.nn.functional import cross_entropy

def train_model(model, dataset, learning_rate, lr_decay, weight_decay, batch_size, num_epochs, device, isCheckpoint=False, train_val_split = None, isVerbose=True):
    loss_history = []

    model.to(device)
    dataset.label.to(device)
    dataset.tokens.to(device)
    model.train()

    optimizer = torch.optim.AdamW(
        filter(lambda p: p.requires_grad, model.parameters()), learning_rate, weight_decay=weight_decay
    )
    lr_scheduler = torch.optim.lr_scheduler.LambdaLR(
        optimizer, lambda epoch: lr_decay ** epoch
    )

    # sample minibatch data
    if not train_val_split:
      train_ids = [i for i in range(len(dataset))]
      val_ids = None
    else:
      train_ids, val_ids = train_val_split

    iter_per_epoch = math.ceil(len(train_ids) // batch_size)
    class_weights = torch.tensor(compute_class_weight(class_weight='balanced', classes=np.arange(model.num_classes), y=dataset.label[train_ids].numpy()), dtype=torch.float, device=device)
    loss_fn = torch.nn.NLLLoss(weight = class_weights)
    # loss_fn = cross_entropy
    
    for i in range(num_epochs):
        start_t = time.time()
        local_hist = []
        correct_cnt = 0
        y_preds = torch.empty((0,),device=device)
        y_trues = torch.empty((0,),device=device)
        for j in range(iter_per_epoch):
            tokens, y_true = dataset[train_ids[j * batch_size: (j + 1) * batch_size]]

            # tokens = tokens.to(device)
            y_true = y_true.to(device)

            optimizer.zero_grad()

            digits = model(tokens)
            y_preds = torch.hstack([y_preds,digits.argmax(dim=1)])
            y_trues = torch.hstack([y_trues,y_true])

            probs = torch.nn.LogSoftmax(dim=1)(digits)
            loss = loss_fn(probs,y_true)
            loss.backward()

            local_hist.append(loss.item())
            optimizer.step()

        end_t = time.time()

        loss_mean = np.array(local_hist).mean()
        loss_history.append(loss_mean)
            
        print(
            f"(Epoch {i}), time: {end_t - start_t:.1f}s, loss: {loss_mean:.3f}"
        )
        if isVerbose:
            train_accuracy, train_precision, train_recall, train_f1 = get_scores(y_trues.to('cpu'), y_preds.to('cpu'), model.num_classes) # This is an aggregated result due to GPU size limit
            print(f"    Training Set - accuracy: {train_accuracy:.2f}, precision: {train_precision:.2f}, recall: {train_recall:.2f}, f1-score: {train_f1:.2f},")
            if val_ids is not None:
                val_accuracy, val_precision, val_recall, val_f1 = eval(model, dataset, val_ids, num_classes, device, is_verbose = (loss_mean < 0.5))
                print(f"    Validation Set - accuracy: {val_accuracy:.2f}, precision: {val_precision:.2f}, recall: {val_recall:.2f}, f1-score: {val_f1:.2f},")
        if i%200 == 0 and isCheckpoint:
          dir = "checkpoints"
          if not os.path.exists(dir):
            os.mkdir(dir)
          file = f"epoch{i}.pt"
          path = dir+'/'+file
          torch.save({
                      'epoch': i,
                      'model_state_dict': model.state_dict(),
                      'optimizer_state_dict': optimizer.state_dict(),
                      'loss': loss_mean,
                      }, path)

        lr_scheduler.step()

        if loss_mean < 0.5:
          break
    
    return loss_history

## 5-fold CV

In [9]:
from sklearn.model_selection import KFold
import torch

def train_model_cv5(model, dataset):
    loss_hist = []
    kf = KFold(n_splits=5)
    cnt = 1
    for train_index, val_index in kf.split(dataset):
        model.reset()
        print("Fold "+str(cnt)+" (val", val_index[0],"-",str(val_index[-1])+")")
        loss_hist_fold = train_model(model, device = device, dataset=dataset, train_val_split=(train_index, val_index),learning_rate=3e-6, lr_decay=0.99, weight_decay=1e-4, batch_size=10, num_epochs=300, isCheckpoint = False, isVerbose = True)
        loss_hist.append(loss_hist_fold)
        cnt += 1
    return loss_hist

In [10]:
lost_hist_folds = train_model_cv5(model, dataset)

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaModel: ['lm_head.bias', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.decoder.weight', 'lm_head.layer_norm.bias', 'lm_head.dense.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Fold 1 (val 0 - 42)
(Epoch 0), time: 6.2s, loss: 1.419
    Training Set - accuracy: 0.06, precision: 0.02, recall: 0.25, f1-score: 0.03,
    Validation Set - accuracy: 0.03, precision: 0.01, recall: 0.25, f1-score: 0.01,
(Epoch 1), time: 6.0s, loss: 1.418
    Training Set - accuracy: 0.06, precision: 0.02, recall: 0.25, f1-score: 0.03,
    Validation Set - accuracy: 0.03, precision: 0.01, recall: 0.25, f1-score: 0.01,
(Epoch 2), time: 6.0s, loss: 1.416
    Training Set - accuracy: 0.06, precision: 0.02, recall: 0.25, f1-score: 0.03,
    Validation Set - accuracy: 0.03, precision: 0.01, recall: 0.25, f1-score: 0.01,
(Epoch 3), time: 5.9s, loss: 1.413
    Training Set - accuracy: 0.06, precision: 0.02, recall: 0.25, f1-score: 0.03,
    Validation Set - accuracy: 0.03, precision: 0.01, recall: 0.25, f1-score: 0.01,
(Epoch 4), time: 6.0s, loss: 1.414
    Training Set - accuracy: 0.06, precision: 0.02, recall: 0.25, f1-score: 0.03,
    Validation Set - accuracy: 0.03, precision: 0.01, recal

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaModel: ['lm_head.bias', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.decoder.weight', 'lm_head.layer_norm.bias', 'lm_head.dense.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Fold 2 (val 43 - 84)
(Epoch 0), time: 6.0s, loss: 1.434
    Training Set - accuracy: 0.31, precision: 0.08, recall: 0.25, f1-score: 0.12,
    Validation Set - accuracy: 0.23, precision: 0.06, recall: 0.25, f1-score: 0.09,
(Epoch 1), time: 6.0s, loss: 1.434
    Training Set - accuracy: 0.31, precision: 0.08, recall: 0.25, f1-score: 0.12,
    Validation Set - accuracy: 0.23, precision: 0.06, recall: 0.25, f1-score: 0.09,
(Epoch 2), time: 6.0s, loss: 1.431
    Training Set - accuracy: 0.31, precision: 0.08, recall: 0.25, f1-score: 0.12,
    Validation Set - accuracy: 0.23, precision: 0.06, recall: 0.25, f1-score: 0.09,
(Epoch 3), time: 6.0s, loss: 1.430
    Training Set - accuracy: 0.31, precision: 0.08, recall: 0.25, f1-score: 0.12,
    Validation Set - accuracy: 0.23, precision: 0.06, recall: 0.25, f1-score: 0.09,
(Epoch 4), time: 6.0s, loss: 1.429
    Training Set - accuracy: 0.31, precision: 0.08, recall: 0.25, f1-score: 0.12,
    Validation Set - accuracy: 0.23, precision: 0.06, reca

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaModel: ['lm_head.bias', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.decoder.weight', 'lm_head.layer_norm.bias', 'lm_head.dense.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Fold 3 (val 85 - 126)
(Epoch 0), time: 6.0s, loss: 1.456
    Training Set - accuracy: 0.04, precision: 0.01, recall: 0.25, f1-score: 0.02,
    Validation Set - accuracy: 0.10, precision: 0.03, recall: 0.25, f1-score: 0.05,
(Epoch 1), time: 6.0s, loss: 1.442
    Training Set - accuracy: 0.04, precision: 0.01, recall: 0.25, f1-score: 0.02,
    Validation Set - accuracy: 0.10, precision: 0.03, recall: 0.25, f1-score: 0.05,
(Epoch 2), time: 6.0s, loss: 1.444
    Training Set - accuracy: 0.04, precision: 0.01, recall: 0.25, f1-score: 0.02,
    Validation Set - accuracy: 0.10, precision: 0.03, recall: 0.25, f1-score: 0.05,
(Epoch 3), time: 6.0s, loss: 1.435
    Training Set - accuracy: 0.04, precision: 0.01, recall: 0.25, f1-score: 0.02,
    Validation Set - accuracy: 0.10, precision: 0.03, recall: 0.25, f1-score: 0.05,
(Epoch 4), time: 6.0s, loss: 1.438
    Training Set - accuracy: 0.04, precision: 0.01, recall: 0.25, f1-score: 0.02,
    Validation Set - accuracy: 0.10, precision: 0.03, rec

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaModel: ['lm_head.bias', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.decoder.weight', 'lm_head.layer_norm.bias', 'lm_head.dense.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Fold 4 (val 127 - 168)
(Epoch 0), time: 6.0s, loss: 1.413
    Training Set - accuracy: 0.06, precision: 0.02, recall: 0.23, f1-score: 0.03,
    Validation Set - accuracy: 0.20, precision: 0.05, recall: 0.25, f1-score: 0.08,
(Epoch 1), time: 6.0s, loss: 1.405
    Training Set - accuracy: 0.08, precision: 0.27, recall: 0.26, f1-score: 0.05,
    Validation Set - accuracy: 0.23, precision: 0.18, recall: 0.27, f1-score: 0.12,
(Epoch 2), time: 6.0s, loss: 1.402
    Training Set - accuracy: 0.13, precision: 0.12, recall: 0.27, f1-score: 0.11,
    Validation Set - accuracy: 0.25, precision: 0.12, recall: 0.23, f1-score: 0.15,
(Epoch 3), time: 6.0s, loss: 1.399
    Training Set - accuracy: 0.29, precision: 0.15, recall: 0.23, f1-score: 0.13,
    Validation Set - accuracy: 0.30, precision: 0.18, recall: 0.23, f1-score: 0.16,
(Epoch 4), time: 6.0s, loss: 1.383
    Training Set - accuracy: 0.34, precision: 0.25, recall: 0.25, f1-score: 0.16,
    Validation Set - accuracy: 0.33, precision: 0.20, re

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaModel: ['lm_head.bias', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.decoder.weight', 'lm_head.layer_norm.bias', 'lm_head.dense.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Fold 5 (val 169 - 210)
(Epoch 0), time: 6.0s, loss: 1.419
    Training Set - accuracy: 0.07, precision: 0.12, recall: 0.26, f1-score: 0.04,
    Validation Set - accuracy: 0.07, precision: 0.26, recall: 0.27, f1-score: 0.06,
(Epoch 1), time: 6.0s, loss: 1.410
    Training Set - accuracy: 0.07, precision: 0.14, recall: 0.26, f1-score: 0.04,
    Validation Set - accuracy: 0.07, precision: 0.26, recall: 0.27, f1-score: 0.06,
(Epoch 2), time: 6.0s, loss: 1.408
    Training Set - accuracy: 0.14, precision: 0.17, recall: 0.26, f1-score: 0.09,
    Validation Set - accuracy: 0.15, precision: 0.14, recall: 0.32, f1-score: 0.12,
(Epoch 3), time: 6.0s, loss: 1.409
    Training Set - accuracy: 0.12, precision: 0.17, recall: 0.23, f1-score: 0.08,
    Validation Set - accuracy: 0.10, precision: 0.14, recall: 0.29, f1-score: 0.08,
(Epoch 4), time: 6.0s, loss: 1.412
    Training Set - accuracy: 0.07, precision: 0.11, recall: 0.18, f1-score: 0.05,
    Validation Set - accuracy: 0.05, precision: 0.01, re

In [11]:
from dataset import YouTubeDataset
from models.single_modality_classifiers import RobertaClassifier
splits = [10,20]
num_classes = len(splits)+1
dataset = YouTubeDataset(splits)
model = RobertaClassifier(num_classes)

hist = train_model_cv5(model, dataset)

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaModel: ['lm_head.bias', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.decoder.weight', 'lm_head.layer_norm.bias', 'lm_head.dense.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Initialization success if you see a tensor: tensor([[0.0302, 0.0080, 0.0226]], grad_fn=<AddmmBackward0>).


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaModel: ['lm_head.bias', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.decoder.weight', 'lm_head.layer_norm.bias', 'lm_head.dense.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Fold 1 (val 0 - 42)
(Epoch 0), time: 5.8s, loss: 1.107
    Training Set - accuracy: 0.18, precision: 0.06, recall: 0.33, f1-score: 0.10,
    Validation Set - accuracy: 0.05, precision: 0.02, recall: 0.33, f1-score: 0.03,
(Epoch 1), time: 5.8s, loss: 1.105
    Training Set - accuracy: 0.18, precision: 0.06, recall: 0.33, f1-score: 0.10,
    Validation Set - accuracy: 0.05, precision: 0.02, recall: 0.33, f1-score: 0.03,
(Epoch 2), time: 5.8s, loss: 1.106
    Training Set - accuracy: 0.18, precision: 0.06, recall: 0.33, f1-score: 0.10,
    Validation Set - accuracy: 0.05, precision: 0.02, recall: 0.33, f1-score: 0.03,
(Epoch 3), time: 5.8s, loss: 1.104
    Training Set - accuracy: 0.18, precision: 0.06, recall: 0.33, f1-score: 0.10,
    Validation Set - accuracy: 0.05, precision: 0.02, recall: 0.33, f1-score: 0.03,
(Epoch 4), time: 5.8s, loss: 1.102
    Training Set - accuracy: 0.25, precision: 0.24, recall: 0.37, f1-score: 0.20,
    Validation Set - accuracy: 0.12, precision: 0.22, recal

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaModel: ['lm_head.bias', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.decoder.weight', 'lm_head.layer_norm.bias', 'lm_head.dense.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Fold 2 (val 43 - 84)
(Epoch 0), time: 5.8s, loss: 1.098
    Training Set - accuracy: 0.51, precision: 0.17, recall: 0.33, f1-score: 0.23,
    Validation Set - accuracy: 0.70, precision: 0.23, recall: 0.33, f1-score: 0.27,
(Epoch 1), time: 5.8s, loss: 1.096
    Training Set - accuracy: 0.51, precision: 0.17, recall: 0.33, f1-score: 0.23,
    Validation Set - accuracy: 0.70, precision: 0.23, recall: 0.33, f1-score: 0.27,
(Epoch 2), time: 5.8s, loss: 1.094
    Training Set - accuracy: 0.51, precision: 0.17, recall: 0.33, f1-score: 0.23,
    Validation Set - accuracy: 0.70, precision: 0.23, recall: 0.33, f1-score: 0.27,
(Epoch 3), time: 5.8s, loss: 1.094
    Training Set - accuracy: 0.51, precision: 0.17, recall: 0.33, f1-score: 0.23,
    Validation Set - accuracy: 0.70, precision: 0.23, recall: 0.33, f1-score: 0.27,
(Epoch 4), time: 5.8s, loss: 1.094
    Training Set - accuracy: 0.51, precision: 0.17, recall: 0.33, f1-score: 0.23,
    Validation Set - accuracy: 0.70, precision: 0.23, reca

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaModel: ['lm_head.bias', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.decoder.weight', 'lm_head.layer_norm.bias', 'lm_head.dense.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Fold 3 (val 85 - 126)
(Epoch 0), time: 5.8s, loss: 1.149
    Training Set - accuracy: 0.13, precision: 0.04, recall: 0.33, f1-score: 0.08,
    Validation Set - accuracy: 0.25, precision: 0.41, recall: 0.35, f1-score: 0.16,
(Epoch 1), time: 5.8s, loss: 1.134
    Training Set - accuracy: 0.18, precision: 0.18, recall: 0.39, f1-score: 0.17,
    Validation Set - accuracy: 0.28, precision: 0.22, recall: 0.36, f1-score: 0.21,
(Epoch 2), time: 5.8s, loss: 1.133
    Training Set - accuracy: 0.14, precision: 0.11, recall: 0.31, f1-score: 0.13,
    Validation Set - accuracy: 0.23, precision: 0.20, recall: 0.30, f1-score: 0.17,
(Epoch 3), time: 5.8s, loss: 1.124
    Training Set - accuracy: 0.15, precision: 0.11, recall: 0.28, f1-score: 0.14,
    Validation Set - accuracy: 0.30, precision: 0.20, recall: 0.31, f1-score: 0.24,
(Epoch 4), time: 5.9s, loss: 1.116
    Training Set - accuracy: 0.29, precision: 0.17, recall: 0.38, f1-score: 0.22,
    Validation Set - accuracy: 0.40, precision: 0.25, rec

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaModel: ['lm_head.bias', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.decoder.weight', 'lm_head.layer_norm.bias', 'lm_head.dense.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Fold 4 (val 127 - 168)
(Epoch 0), time: 6.0s, loss: 1.106
    Training Set - accuracy: 0.31, precision: 0.43, recall: 0.34, f1-score: 0.16,
    Validation Set - accuracy: 0.28, precision: 0.10, recall: 0.31, f1-score: 0.15,
(Epoch 1), time: 6.0s, loss: 1.103
    Training Set - accuracy: 0.31, precision: 0.21, recall: 0.32, f1-score: 0.18,
    Validation Set - accuracy: 0.33, precision: 0.24, recall: 0.34, f1-score: 0.21,
(Epoch 2), time: 6.0s, loss: 1.099
    Training Set - accuracy: 0.33, precision: 0.29, recall: 0.33, f1-score: 0.21,
    Validation Set - accuracy: 0.38, precision: 0.36, recall: 0.39, f1-score: 0.26,
(Epoch 3), time: 6.0s, loss: 1.098
    Training Set - accuracy: 0.36, precision: 0.32, recall: 0.36, f1-score: 0.24,
    Validation Set - accuracy: 0.33, precision: 0.22, recall: 0.34, f1-score: 0.21,
(Epoch 4), time: 6.0s, loss: 1.090
    Training Set - accuracy: 0.36, precision: 0.32, recall: 0.35, f1-score: 0.25,
    Validation Set - accuracy: 0.38, precision: 0.29, re

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaModel: ['lm_head.bias', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.decoder.weight', 'lm_head.layer_norm.bias', 'lm_head.dense.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Fold 5 (val 169 - 210)
(Epoch 0), time: 6.0s, loss: 1.122
    Training Set - accuracy: 0.15, precision: 0.05, recall: 0.33, f1-score: 0.09,
    Validation Set - accuracy: 0.17, precision: 0.06, recall: 0.33, f1-score: 0.10,
(Epoch 1), time: 6.0s, loss: 1.120
    Training Set - accuracy: 0.15, precision: 0.05, recall: 0.33, f1-score: 0.09,
    Validation Set - accuracy: 0.17, precision: 0.06, recall: 0.33, f1-score: 0.10,
(Epoch 2), time: 6.0s, loss: 1.118
    Training Set - accuracy: 0.15, precision: 0.05, recall: 0.33, f1-score: 0.09,
    Validation Set - accuracy: 0.17, precision: 0.06, recall: 0.33, f1-score: 0.10,
(Epoch 3), time: 6.0s, loss: 1.117
    Training Set - accuracy: 0.15, precision: 0.05, recall: 0.33, f1-score: 0.09,
    Validation Set - accuracy: 0.17, precision: 0.06, recall: 0.33, f1-score: 0.10,
(Epoch 4), time: 5.9s, loss: 1.117
    Training Set - accuracy: 0.15, precision: 0.05, recall: 0.33, f1-score: 0.09,
    Validation Set - accuracy: 0.17, precision: 0.06, re

In [13]:
from dataset import YouTubeDataset
splits = [10]
num_classes = len(splits)+1
dataset = YouTubeDataset(splits)
model = RobertaClassifier(num_classes)

hist = train_model_cv5(model, dataset)

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaModel: ['lm_head.bias', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.decoder.weight', 'lm_head.layer_norm.bias', 'lm_head.dense.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Initialization success if you see a tensor: tensor([[0.0514, 0.0068]], grad_fn=<AddmmBackward0>).


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaModel: ['lm_head.bias', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.decoder.weight', 'lm_head.layer_norm.bias', 'lm_head.dense.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Fold 1 (val 0 - 42)
(Epoch 0), time: 5.8s, loss: 0.704
    Training Set - accuracy: 0.53, precision: 0.27, recall: 0.50, f1-score: 0.35,
    Validation Set - accuracy: 0.17, precision: 0.09, recall: 0.50, f1-score: 0.15,
(Epoch 1), time: 5.8s, loss: 0.695
    Training Set - accuracy: 0.53, precision: 0.27, recall: 0.50, f1-score: 0.35,
    Validation Set - accuracy: 0.17, precision: 0.09, recall: 0.50, f1-score: 0.15,
(Epoch 2), time: 5.9s, loss: 0.695
    Training Set - accuracy: 0.53, precision: 0.27, recall: 0.50, f1-score: 0.35,
    Validation Set - accuracy: 0.17, precision: 0.09, recall: 0.50, f1-score: 0.15,
(Epoch 3), time: 5.9s, loss: 0.697
    Training Set - accuracy: 0.53, precision: 0.27, recall: 0.50, f1-score: 0.35,
    Validation Set - accuracy: 0.17, precision: 0.09, recall: 0.50, f1-score: 0.15,
(Epoch 4), time: 5.9s, loss: 0.696
    Training Set - accuracy: 0.53, precision: 0.27, recall: 0.50, f1-score: 0.35,
    Validation Set - accuracy: 0.17, precision: 0.09, recal

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaModel: ['lm_head.bias', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.decoder.weight', 'lm_head.layer_norm.bias', 'lm_head.dense.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Fold 2 (val 43 - 84)
(Epoch 0), time: 5.9s, loss: 0.712
    Training Set - accuracy: 0.49, precision: 0.24, recall: 0.50, f1-score: 0.33,
    Validation Set - accuracy: 0.30, precision: 0.15, recall: 0.50, f1-score: 0.23,
(Epoch 1), time: 5.7s, loss: 0.709
    Training Set - accuracy: 0.49, precision: 0.24, recall: 0.50, f1-score: 0.33,
    Validation Set - accuracy: 0.30, precision: 0.15, recall: 0.50, f1-score: 0.23,
(Epoch 2), time: 5.8s, loss: 0.706
    Training Set - accuracy: 0.49, precision: 0.24, recall: 0.50, f1-score: 0.33,
    Validation Set - accuracy: 0.30, precision: 0.15, recall: 0.50, f1-score: 0.23,
(Epoch 3), time: 5.8s, loss: 0.704
    Training Set - accuracy: 0.49, precision: 0.24, recall: 0.50, f1-score: 0.33,
    Validation Set - accuracy: 0.30, precision: 0.15, recall: 0.50, f1-score: 0.23,
(Epoch 4), time: 5.8s, loss: 0.699
    Training Set - accuracy: 0.48, precision: 0.24, recall: 0.49, f1-score: 0.32,
    Validation Set - accuracy: 0.30, precision: 0.15, reca

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaModel: ['lm_head.bias', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.decoder.weight', 'lm_head.layer_norm.bias', 'lm_head.dense.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Fold 3 (val 85 - 126)
(Epoch 0), time: 6.0s, loss: 0.700
    Training Set - accuracy: 0.59, precision: 0.30, recall: 0.50, f1-score: 0.37,
    Validation Set - accuracy: 0.38, precision: 0.19, recall: 0.50, f1-score: 0.27,
(Epoch 1), time: 6.0s, loss: 0.699
    Training Set - accuracy: 0.59, precision: 0.30, recall: 0.50, f1-score: 0.37,
    Validation Set - accuracy: 0.38, precision: 0.19, recall: 0.50, f1-score: 0.27,
(Epoch 2), time: 6.0s, loss: 0.699
    Training Set - accuracy: 0.59, precision: 0.30, recall: 0.50, f1-score: 0.37,
    Validation Set - accuracy: 0.38, precision: 0.19, recall: 0.50, f1-score: 0.27,
(Epoch 3), time: 6.0s, loss: 0.697
    Training Set - accuracy: 0.59, precision: 0.30, recall: 0.50, f1-score: 0.37,
    Validation Set - accuracy: 0.38, precision: 0.19, recall: 0.50, f1-score: 0.27,
(Epoch 4), time: 6.0s, loss: 0.699
    Training Set - accuracy: 0.59, precision: 0.30, recall: 0.50, f1-score: 0.37,
    Validation Set - accuracy: 0.38, precision: 0.19, rec

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaModel: ['lm_head.bias', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.decoder.weight', 'lm_head.layer_norm.bias', 'lm_head.dense.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Fold 4 (val 127 - 168)
(Epoch 0), time: 6.0s, loss: 0.705
    Training Set - accuracy: 0.42, precision: 0.21, recall: 0.49, f1-score: 0.30,
    Validation Set - accuracy: 0.55, precision: 0.28, recall: 0.50, f1-score: 0.35,
(Epoch 1), time: 6.0s, loss: 0.696
    Training Set - accuracy: 0.42, precision: 0.38, recall: 0.49, f1-score: 0.30,
    Validation Set - accuracy: 0.60, precision: 0.79, recall: 0.56, f1-score: 0.47,
(Epoch 2), time: 6.0s, loss: 0.695
    Training Set - accuracy: 0.42, precision: 0.46, recall: 0.49, f1-score: 0.35,
    Validation Set - accuracy: 0.55, precision: 0.53, recall: 0.51, f1-score: 0.40,
(Epoch 3), time: 6.0s, loss: 0.693
    Training Set - accuracy: 0.49, precision: 0.60, recall: 0.54, f1-score: 0.44,
    Validation Set - accuracy: 0.55, precision: 0.53, recall: 0.52, f1-score: 0.46,
(Epoch 4), time: 6.0s, loss: 0.693
    Training Set - accuracy: 0.58, precision: 0.60, recall: 0.60, f1-score: 0.58,
    Validation Set - accuracy: 0.53, precision: 0.52, re

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaModel: ['lm_head.bias', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.decoder.weight', 'lm_head.layer_norm.bias', 'lm_head.dense.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Fold 5 (val 169 - 210)
(Epoch 0), time: 6.0s, loss: 0.715
    Training Set - accuracy: 0.40, precision: 0.20, recall: 0.50, f1-score: 0.29,
    Validation Set - accuracy: 0.65, precision: 0.33, recall: 0.50, f1-score: 0.39,
(Epoch 1), time: 6.0s, loss: 0.708
    Training Set - accuracy: 0.40, precision: 0.20, recall: 0.50, f1-score: 0.29,
    Validation Set - accuracy: 0.65, precision: 0.33, recall: 0.50, f1-score: 0.39,
(Epoch 2), time: 6.0s, loss: 0.704
    Training Set - accuracy: 0.41, precision: 0.70, recall: 0.51, f1-score: 0.30,
    Validation Set - accuracy: 0.65, precision: 0.33, recall: 0.50, f1-score: 0.39,
(Epoch 3), time: 6.0s, loss: 0.703
    Training Set - accuracy: 0.40, precision: 0.45, recall: 0.50, f1-score: 0.29,
    Validation Set - accuracy: 0.68, precision: 0.83, recall: 0.54, f1-score: 0.47,
(Epoch 4), time: 6.0s, loss: 0.701
    Training Set - accuracy: 0.37, precision: 0.34, recall: 0.45, f1-score: 0.30,
    Validation Set - accuracy: 0.70, precision: 0.84, re