In [1]:
%load_ext autoreload
%autosave 60
%autoreload 2

Autosaving every 60 seconds


In [2]:
from google.colab import drive
drive.mount("/content/drive")
%cd drive/MyDrive/FusionModel

Mounted at /content/drive
/content/drive/MyDrive/FusionModel


In [3]:
# !pip install torch==1.9.0 torchvision==0.10.0 torchaudio==0.9.0 torchtext==0.10.0
!pip install transformers==4.8.0

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting transformers==4.8.0
  Downloading transformers-4.8.0-py3-none-any.whl (2.5 MB)
[K     |████████████████████████████████| 2.5 MB 13.7 MB/s 
[?25hCollecting sacremoses
  Downloading sacremoses-0.0.53.tar.gz (880 kB)
[K     |████████████████████████████████| 880 kB 60.9 MB/s 
Collecting tokenizers<0.11,>=0.10.1
  Downloading tokenizers-0.10.3-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (3.3 MB)
[K     |████████████████████████████████| 3.3 MB 46.4 MB/s 
Collecting huggingface-hub==0.0.12
  Downloading huggingface_hub-0.0.12-py3-none-any.whl (37 kB)
Building wheels for collected packages: sacremoses
  Building wheel for sacremoses (setup.py) ... [?25l[?25hdone
  Created wheel for sacremoses: filename=sacremoses-0.0.53-py3-none-any.whl size=895260 sha256=c5f065314494e24eaceb15e3f51ac9bb2bedbcadc0d93902709741dbacc37e44
  Stor

In [4]:
splits = [10,20,30]
num_classes = len(splits)+1

In [5]:
from dataset import YouTubeDataset
dataset = YouTubeDataset(splits)

Downloading:   0%|          | 0.00/899k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/456k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

In [6]:
from model import SimpleBert
model = SimpleBert(num_classes)

Downloading:   0%|          | 0.00/481 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/501M [00:00<?, ?B/s]

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaModel: ['lm_head.bias', 'lm_head.dense.bias', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'lm_head.dense.weight', 'lm_head.layer_norm.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Initialization success if you see a tensor: tensor([[ 0.0480, -0.1949,  0.2635,  0.1745]], grad_fn=<AddmmBackward0>).


In [7]:
# import torch
# checkpoint = torch.load("checkpoints/epoch200.pt",map_location=torch.device('cpu'))
# model.load_state_dict(checkpoint['model_state_dict'])


In [8]:
# model.base.requires_grad = False

In [9]:
import torch
import math
import os
import time
import numpy as np
from sklearn.utils.class_weight import compute_class_weight
from train import eval, get_scores
from torch.nn.functional import cross_entropy

def train_model(model, dataset, learning_rate, lr_decay, batch_size, num_epochs, device='cuda', isCheckpoint=False, train_val_split = None, isVerbose=True):
    loss_history = []

    model = model.to(device)
    model.train()

    optimizer = torch.optim.AdamW(
        filter(lambda p: p.requires_grad, model.parameters()), learning_rate
    )
    lr_scheduler = torch.optim.lr_scheduler.LambdaLR(
        optimizer, lambda epoch: lr_decay ** epoch
    )

    # sample minibatch data
    if not train_val_split:
      train_ids = [i for i in range(len(dataset))]
      val_ids = None
    else:
      train_ids, val_ids = train_val_split

    iter_per_epoch = math.ceil(len(train_ids) // batch_size)
    class_weights = torch.tensor(compute_class_weight(class_weight='balanced', classes=np.arange(model.num_classes), y=dataset.label[train_ids].numpy()), dtype=torch.float, device=device)
    loss_fn = torch.nn.NLLLoss(weight = class_weights)
    # loss_fn = cross_entropy
    
    for i in range(num_epochs):
        start_t = time.time()
        local_hist = []
        correct_cnt = 0
        y_preds = torch.empty((0,),device=device)
        y_trues = torch.empty((0,),device=device)
        for j in range(iter_per_epoch):
            tokens, y_true = dataset[train_ids[j * batch_size: (j + 1) * batch_size]]

            tokens = tokens.to(device)
            y_true = y_true.to(device)

            optimizer.zero_grad()

            digits = model(tokens)
            y_preds = torch.hstack([y_preds,digits.argmax(dim=1)])
            y_trues = torch.hstack([y_trues,y_true])

            probs = torch.nn.LogSoftmax(dim=1)(digits)
            loss = loss_fn(probs,y_true)
            loss.backward()

            local_hist.append(loss.item())
            optimizer.step()

        end_t = time.time()

        loss_mean = np.array(local_hist).mean()
        loss_history.append(loss_mean)
            
        print(
            f"(Epoch {i}), time: {end_t - start_t:.1f}s, loss: {loss_mean:.3f}"
        )
        if isVerbose:
            train_accuracy, train_precision, train_recall, train_f1 = get_scores(y_trues.to('cpu'), y_preds.to('cpu'), model.num_classes) # This is an aggregated result due to GPU size limit
            print(f"    Training Set - accuracy: {train_accuracy:.2f}, precision: {train_precision:.2f}, recall: {train_recall:.2f}, f1-score: {train_f1:.2f},")
            if val_ids is not None:
                val_accuracy, val_precision, val_recall, val_f1 = eval(model, dataset, val_ids, num_classes)
                print(f"    Validation Set - accuracy: {val_accuracy:.2f}, precision: {val_precision:.2f}, recall: {val_recall:.2f}, f1-score: {val_f1:.2f},")
        if i%200 == 0 and isCheckpoint:
          dir = "checkpoints"
          if not os.path.exists(dir):
            os.mkdir(dir)
          file = f"epoch{i}.pt"
          path = dir+'/'+file
          torch.save({
                      'epoch': i,
                      'model_state_dict': model.state_dict(),
                      'optimizer_state_dict': optimizer.state_dict(),
                      'loss': loss_mean,
                      }, path)

        lr_scheduler.step()

        if loss_mean < 0.5:
          break
    
    return loss_history

In [10]:
# loss_hist = train_model(model, dataset, learning_rate=5e-6, lr_decay=0.99, batch_size=10, num_epochs=200, isCheckpoint = True, isVerbose = True)

## 5-fold CV

In [11]:
from sklearn.model_selection import KFold
import torch

def train_model_cv5(model, dataset):
    loss_hist = []
    kf = KFold(n_splits=5)
    cnt = 1
    for train_index, val_index in kf.split(dataset):
        model.reset()
        print("Fold "+str(cnt)+" (val", val_index[0],"-",str(val_index[-1])+")")
        loss_hist_fold = train_model(model, dataset=dataset, train_val_split=(train_index, val_index),learning_rate=3e-6, lr_decay=0.99, batch_size=10, num_epochs=300, isCheckpoint = False, isVerbose = True)
        loss_hist.append(loss_hist_fold)
        cnt += 1
    return loss_hist

In [12]:
lost_hist_folds = train_model_cv5(model, dataset)

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaModel: ['lm_head.bias', 'lm_head.dense.bias', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'lm_head.dense.weight', 'lm_head.layer_norm.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Fold 1 (val 0 - 42)
(Epoch 0), time: 11.3s, loss: 1.432
    Training Set - accuracy: 0.07, precision: 0.07, recall: 0.24, f1-score: 0.05,
    Validation Set - accuracy: 0.00, precision: 0.00, recall: 0.00, f1-score: 0.00,
(Epoch 1), time: 8.8s, loss: 1.425
    Training Set - accuracy: 0.21, precision: 0.11, recall: 0.34, f1-score: 0.14,
    Validation Set - accuracy: 0.15, precision: 0.07, recall: 0.50, f1-score: 0.11,
(Epoch 2), time: 8.8s, loss: 1.422
    Training Set - accuracy: 0.33, precision: 0.11, recall: 0.26, f1-score: 0.15,
    Validation Set - accuracy: 0.12, precision: 0.03, recall: 0.25, f1-score: 0.06,
(Epoch 3), time: 8.8s, loss: 1.415
    Training Set - accuracy: 0.35, precision: 0.09, recall: 0.25, f1-score: 0.13,
    Validation Set - accuracy: 0.12, precision: 0.03, recall: 0.25, f1-score: 0.06,
(Epoch 4), time: 8.9s, loss: 1.414
    Training Set - accuracy: 0.35, precision: 0.09, recall: 0.25, f1-score: 0.13,
    Validation Set - accuracy: 0.12, precision: 0.03, reca

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaModel: ['lm_head.bias', 'lm_head.dense.bias', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'lm_head.dense.weight', 'lm_head.layer_norm.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Fold 2 (val 43 - 84)
(Epoch 0), time: 8.8s, loss: 1.384
    Training Set - accuracy: 0.51, precision: 0.13, recall: 0.25, f1-score: 0.17,
    Validation Set - accuracy: 0.70, precision: 0.17, recall: 0.25, f1-score: 0.21,
(Epoch 1), time: 8.7s, loss: 1.378
    Training Set - accuracy: 0.51, precision: 0.13, recall: 0.25, f1-score: 0.17,
    Validation Set - accuracy: 0.70, precision: 0.17, recall: 0.25, f1-score: 0.21,
(Epoch 2), time: 8.8s, loss: 1.377
    Training Set - accuracy: 0.51, precision: 0.13, recall: 0.25, f1-score: 0.17,
    Validation Set - accuracy: 0.70, precision: 0.17, recall: 0.25, f1-score: 0.21,
(Epoch 3), time: 8.8s, loss: 1.376
    Training Set - accuracy: 0.51, precision: 0.13, recall: 0.25, f1-score: 0.17,
    Validation Set - accuracy: 0.70, precision: 0.17, recall: 0.25, f1-score: 0.21,
(Epoch 4), time: 8.8s, loss: 1.374
    Training Set - accuracy: 0.51, precision: 0.13, recall: 0.25, f1-score: 0.17,
    Validation Set - accuracy: 0.70, precision: 0.17, reca

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaModel: ['lm_head.bias', 'lm_head.dense.bias', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'lm_head.dense.weight', 'lm_head.layer_norm.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Fold 3 (val 85 - 126)
(Epoch 0), time: 8.8s, loss: 1.385
    Training Set - accuracy: 0.14, precision: 0.12, recall: 0.25, f1-score: 0.08,
    Validation Set - accuracy: 0.17, precision: 0.18, recall: 0.25, f1-score: 0.12,
(Epoch 1), time: 8.8s, loss: 1.379
    Training Set - accuracy: 0.15, precision: 0.15, recall: 0.19, f1-score: 0.09,
    Validation Set - accuracy: 0.20, precision: 0.22, recall: 0.30, f1-score: 0.14,
(Epoch 2), time: 8.8s, loss: 1.377
    Training Set - accuracy: 0.38, precision: 0.14, recall: 0.19, f1-score: 0.16,
    Validation Set - accuracy: 0.35, precision: 0.09, recall: 0.23, f1-score: 0.13,
(Epoch 3), time: 8.7s, loss: 1.375
    Training Set - accuracy: 0.57, precision: 0.15, recall: 0.24, f1-score: 0.18,
    Validation Set - accuracy: 0.38, precision: 0.09, recall: 0.25, f1-score: 0.14,
(Epoch 4), time: 8.8s, loss: 1.367
    Training Set - accuracy: 0.59, precision: 0.15, recall: 0.25, f1-score: 0.19,
    Validation Set - accuracy: 0.38, precision: 0.09, rec

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaModel: ['lm_head.bias', 'lm_head.dense.bias', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'lm_head.dense.weight', 'lm_head.layer_norm.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Fold 4 (val 127 - 168)
(Epoch 0), time: 8.8s, loss: 1.415
    Training Set - accuracy: 0.06, precision: 0.01, recall: 0.25, f1-score: 0.03,
    Validation Set - accuracy: 0.05, precision: 0.01, recall: 0.25, f1-score: 0.02,
(Epoch 1), time: 8.8s, loss: 1.410
    Training Set - accuracy: 0.06, precision: 0.01, recall: 0.25, f1-score: 0.03,
    Validation Set - accuracy: 0.05, precision: 0.01, recall: 0.25, f1-score: 0.02,
(Epoch 2), time: 8.8s, loss: 1.402
    Training Set - accuracy: 0.06, precision: 0.01, recall: 0.25, f1-score: 0.03,
    Validation Set - accuracy: 0.05, precision: 0.01, recall: 0.25, f1-score: 0.02,
(Epoch 3), time: 8.8s, loss: 1.399
    Training Set - accuracy: 0.06, precision: 0.01, recall: 0.25, f1-score: 0.03,
    Validation Set - accuracy: 0.05, precision: 0.01, recall: 0.25, f1-score: 0.02,
(Epoch 4), time: 8.8s, loss: 1.398
    Training Set - accuracy: 0.06, precision: 0.01, recall: 0.25, f1-score: 0.03,
    Validation Set - accuracy: 0.05, precision: 0.01, re

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaModel: ['lm_head.bias', 'lm_head.dense.bias', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'lm_head.dense.weight', 'lm_head.layer_norm.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Fold 5 (val 169 - 210)
(Epoch 0), time: 8.8s, loss: 1.419
    Training Set - accuracy: 0.25, precision: 0.06, recall: 0.25, f1-score: 0.10,
    Validation Set - accuracy: 0.47, precision: 0.12, recall: 0.25, f1-score: 0.16,
(Epoch 1), time: 8.8s, loss: 1.415
    Training Set - accuracy: 0.25, precision: 0.06, recall: 0.25, f1-score: 0.10,
    Validation Set - accuracy: 0.47, precision: 0.12, recall: 0.25, f1-score: 0.16,
(Epoch 2), time: 8.8s, loss: 1.412
    Training Set - accuracy: 0.25, precision: 0.06, recall: 0.25, f1-score: 0.10,
    Validation Set - accuracy: 0.47, precision: 0.12, recall: 0.25, f1-score: 0.16,
(Epoch 3), time: 8.8s, loss: 1.408
    Training Set - accuracy: 0.27, precision: 0.25, recall: 0.26, f1-score: 0.12,
    Validation Set - accuracy: 0.47, precision: 0.12, recall: 0.25, f1-score: 0.16,
(Epoch 4), time: 8.8s, loss: 1.404
    Training Set - accuracy: 0.28, precision: 0.26, recall: 0.26, f1-score: 0.13,
    Validation Set - accuracy: 0.47, precision: 0.24, re

In [None]:
from dataset import YouTubeDataset
from model import SimpleBert
splits = [10,20]
num_classes = len(splits)+1
dataset = YouTubeDataset(splits)
model = SimpleBert(num_classes)

hist = train_model_cv5(model, dataset)

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaModel: ['lm_head.bias', 'lm_head.dense.bias', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'lm_head.dense.weight', 'lm_head.layer_norm.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Initialization success if you see a tensor: tensor([[0.0817, 0.2310, 0.1790]], grad_fn=<AddmmBackward0>).


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaModel: ['lm_head.bias', 'lm_head.dense.bias', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'lm_head.dense.weight', 'lm_head.layer_norm.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Fold 1 (val 0 - 42)
(Epoch 0), time: 8.8s, loss: 1.110
    Training Set - accuracy: 0.47, precision: 0.16, recall: 0.33, f1-score: 0.21,
    Validation Set - accuracy: 0.82, precision: 0.27, recall: 0.33, f1-score: 0.30,
(Epoch 1), time: 8.7s, loss: 1.109
    Training Set - accuracy: 0.47, precision: 0.16, recall: 0.33, f1-score: 0.21,
    Validation Set - accuracy: 0.82, precision: 0.27, recall: 0.33, f1-score: 0.30,
(Epoch 2), time: 8.7s, loss: 1.101
    Training Set - accuracy: 0.47, precision: 0.16, recall: 0.33, f1-score: 0.21,
    Validation Set - accuracy: 0.82, precision: 0.27, recall: 0.33, f1-score: 0.30,
(Epoch 3), time: 8.8s, loss: 1.105
    Training Set - accuracy: 0.47, precision: 0.16, recall: 0.33, f1-score: 0.21,
    Validation Set - accuracy: 0.82, precision: 0.27, recall: 0.33, f1-score: 0.30,
(Epoch 4), time: 8.8s, loss: 1.105
    Training Set - accuracy: 0.47, precision: 0.16, recall: 0.33, f1-score: 0.21,
    Validation Set - accuracy: 0.80, precision: 0.27, recal

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaModel: ['lm_head.bias', 'lm_head.dense.bias', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'lm_head.dense.weight', 'lm_head.layer_norm.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Fold 2 (val 43 - 84)
(Epoch 0), time: 8.8s, loss: 1.128
    Training Set - accuracy: 0.17, precision: 0.06, recall: 0.33, f1-score: 0.10,
    Validation Set - accuracy: 0.07, precision: 0.02, recall: 0.33, f1-score: 0.05,
(Epoch 1), time: 8.7s, loss: 1.123
    Training Set - accuracy: 0.17, precision: 0.06, recall: 0.33, f1-score: 0.10,
    Validation Set - accuracy: 0.07, precision: 0.02, recall: 0.33, f1-score: 0.05,
(Epoch 2), time: 8.8s, loss: 1.120
    Training Set - accuracy: 0.17, precision: 0.06, recall: 0.33, f1-score: 0.10,
    Validation Set - accuracy: 0.07, precision: 0.02, recall: 0.33, f1-score: 0.05,
(Epoch 3), time: 8.7s, loss: 1.120
    Training Set - accuracy: 0.17, precision: 0.06, recall: 0.33, f1-score: 0.10,
    Validation Set - accuracy: 0.07, precision: 0.02, recall: 0.33, f1-score: 0.05,
(Epoch 4), time: 8.8s, loss: 1.120
    Training Set - accuracy: 0.17, precision: 0.06, recall: 0.33, f1-score: 0.10,
    Validation Set - accuracy: 0.07, precision: 0.02, reca

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaModel: ['lm_head.bias', 'lm_head.dense.bias', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'lm_head.dense.weight', 'lm_head.layer_norm.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Fold 3 (val 85 - 126)
(Epoch 0), time: 8.8s, loss: 1.094
    Training Set - accuracy: 0.57, precision: 0.20, recall: 0.32, f1-score: 0.24,
    Validation Set - accuracy: 0.38, precision: 0.12, recall: 0.33, f1-score: 0.18,
(Epoch 1), time: 8.7s, loss: 1.100
    Training Set - accuracy: 0.59, precision: 0.20, recall: 0.33, f1-score: 0.25,
    Validation Set - accuracy: 0.38, precision: 0.12, recall: 0.33, f1-score: 0.18,
(Epoch 2), time: 8.7s, loss: 1.085
    Training Set - accuracy: 0.59, precision: 0.20, recall: 0.33, f1-score: 0.25,
    Validation Set - accuracy: 0.38, precision: 0.12, recall: 0.33, f1-score: 0.18,
(Epoch 3), time: 8.7s, loss: 1.086
    Training Set - accuracy: 0.59, precision: 0.20, recall: 0.33, f1-score: 0.25,
    Validation Set - accuracy: 0.38, precision: 0.12, recall: 0.33, f1-score: 0.18,
(Epoch 4), time: 8.7s, loss: 1.090
    Training Set - accuracy: 0.59, precision: 0.20, recall: 0.33, f1-score: 0.25,
    Validation Set - accuracy: 0.38, precision: 0.12, rec

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaModel: ['lm_head.bias', 'lm_head.dense.bias', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'lm_head.dense.weight', 'lm_head.layer_norm.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Fold 4 (val 127 - 168)
(Epoch 0), time: 8.8s, loss: 1.087
    Training Set - accuracy: 0.57, precision: 0.19, recall: 0.33, f1-score: 0.24,
    Validation Set - accuracy: 0.45, precision: 0.15, recall: 0.33, f1-score: 0.21,
(Epoch 1), time: 8.7s, loss: 1.086
    Training Set - accuracy: 0.57, precision: 0.19, recall: 0.33, f1-score: 0.24,
    Validation Set - accuracy: 0.45, precision: 0.15, recall: 0.33, f1-score: 0.21,
(Epoch 2), time: 8.7s, loss: 1.083
    Training Set - accuracy: 0.57, precision: 0.19, recall: 0.33, f1-score: 0.24,
    Validation Set - accuracy: 0.45, precision: 0.15, recall: 0.33, f1-score: 0.21,
(Epoch 3), time: 8.8s, loss: 1.086
    Training Set - accuracy: 0.57, precision: 0.19, recall: 0.33, f1-score: 0.24,
    Validation Set - accuracy: 0.45, precision: 0.15, recall: 0.33, f1-score: 0.21,
(Epoch 4), time: 8.7s, loss: 1.086
    Training Set - accuracy: 0.57, precision: 0.19, recall: 0.33, f1-score: 0.24,
    Validation Set - accuracy: 0.45, precision: 0.15, re

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaModel: ['lm_head.bias', 'lm_head.dense.bias', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'lm_head.dense.weight', 'lm_head.layer_norm.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Fold 5 (val 169 - 210)
(Epoch 0), time: 8.8s, loss: 1.118
    Training Set - accuracy: 0.15, precision: 0.05, recall: 0.33, f1-score: 0.09,
    Validation Set - accuracy: 0.20, precision: 0.39, recall: 0.36, f1-score: 0.15,
(Epoch 1), time: 8.8s, loss: 1.112
    Training Set - accuracy: 0.17, precision: 0.18, recall: 0.33, f1-score: 0.11,
    Validation Set - accuracy: 0.23, precision: 0.39, recall: 0.38, f1-score: 0.19,
(Epoch 2), time: 8.7s, loss: 1.110
    Training Set - accuracy: 0.20, precision: 0.24, recall: 0.35, f1-score: 0.14,
    Validation Set - accuracy: 0.17, precision: 0.06, recall: 0.33, f1-score: 0.10,
(Epoch 3), time: 8.7s, loss: 1.108
    Training Set - accuracy: 0.21, precision: 0.22, recall: 0.35, f1-score: 0.16,
    Validation Set - accuracy: 0.23, precision: 0.23, recall: 0.36, f1-score: 0.20,
(Epoch 4), time: 8.8s, loss: 1.105
    Training Set - accuracy: 0.25, precision: 0.21, recall: 0.30, f1-score: 0.19,
    Validation Set - accuracy: 0.25, precision: 0.18, re

In [15]:
from dataset import YouTubeDataset
from model import SimpleBert
splits = [10]
num_classes = len(splits)+1
dataset = YouTubeDataset(splits)
model = SimpleBert(num_classes)

hist = train_model_cv5(model, dataset)

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaModel: ['lm_head.bias', 'lm_head.dense.bias', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'lm_head.dense.weight', 'lm_head.layer_norm.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Initialization success if you see a tensor: tensor([[0.2250, 0.2336]], grad_fn=<AddmmBackward0>).


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaModel: ['lm_head.bias', 'lm_head.dense.bias', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'lm_head.dense.weight', 'lm_head.layer_norm.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Fold 1 (val 0 - 42)
(Epoch 0), time: 8.8s, loss: 0.696
    Training Set - accuracy: 0.47, precision: 0.23, recall: 0.50, f1-score: 0.32,
    Validation Set - accuracy: 0.85, precision: 0.92, recall: 0.57, f1-score: 0.58,
(Epoch 1), time: 8.8s, loss: 0.694
    Training Set - accuracy: 0.49, precision: 0.74, recall: 0.52, f1-score: 0.37,
    Validation Set - accuracy: 0.82, precision: 0.41, recall: 0.50, f1-score: 0.45,
(Epoch 2), time: 8.7s, loss: 0.694
    Training Set - accuracy: 0.48, precision: 0.52, recall: 0.51, f1-score: 0.39,
    Validation Set - accuracy: 0.70, precision: 0.40, recall: 0.42, f1-score: 0.41,
(Epoch 3), time: 8.7s, loss: 0.694
    Training Set - accuracy: 0.46, precision: 0.45, recall: 0.48, f1-score: 0.37,
    Validation Set - accuracy: 0.72, precision: 0.40, recall: 0.44, f1-score: 0.42,
(Epoch 4), time: 8.7s, loss: 0.695
    Training Set - accuracy: 0.43, precision: 0.39, recall: 0.46, f1-score: 0.35,
    Validation Set - accuracy: 0.80, precision: 0.41, recal

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaModel: ['lm_head.bias', 'lm_head.dense.bias', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'lm_head.dense.weight', 'lm_head.layer_norm.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Fold 2 (val 43 - 84)
(Epoch 0), time: 8.8s, loss: 0.695
    Training Set - accuracy: 0.47, precision: 0.43, recall: 0.48, f1-score: 0.36,
    Validation Set - accuracy: 0.38, precision: 0.66, recall: 0.55, f1-score: 0.34,
(Epoch 1), time: 8.7s, loss: 0.694
    Training Set - accuracy: 0.46, precision: 0.41, recall: 0.47, f1-score: 0.36,
    Validation Set - accuracy: 0.40, precision: 0.53, recall: 0.52, f1-score: 0.39,
(Epoch 2), time: 8.8s, loss: 0.691
    Training Set - accuracy: 0.53, precision: 0.61, recall: 0.54, f1-score: 0.43,
    Validation Set - accuracy: 0.40, precision: 0.67, recall: 0.57, f1-score: 0.38,
(Epoch 3), time: 8.8s, loss: 0.693
    Training Set - accuracy: 0.47, precision: 0.47, recall: 0.48, f1-score: 0.41,
    Validation Set - accuracy: 0.28, precision: 0.30, recall: 0.43, f1-score: 0.24,
(Epoch 4), time: 8.8s, loss: 0.690
    Training Set - accuracy: 0.52, precision: 0.56, recall: 0.53, f1-score: 0.45,
    Validation Set - accuracy: 0.42, precision: 0.67, reca

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaModel: ['lm_head.bias', 'lm_head.dense.bias', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'lm_head.dense.weight', 'lm_head.layer_norm.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Fold 3 (val 85 - 126)
(Epoch 0), time: 8.8s, loss: 0.694
    Training Set - accuracy: 0.59, precision: 0.30, recall: 0.50, f1-score: 0.37,
    Validation Set - accuracy: 0.38, precision: 0.19, recall: 0.50, f1-score: 0.27,
(Epoch 1), time: 8.7s, loss: 0.690
    Training Set - accuracy: 0.59, precision: 0.30, recall: 0.50, f1-score: 0.37,
    Validation Set - accuracy: 0.38, precision: 0.19, recall: 0.50, f1-score: 0.27,
(Epoch 2), time: 8.8s, loss: 0.689
    Training Set - accuracy: 0.59, precision: 0.30, recall: 0.50, f1-score: 0.37,
    Validation Set - accuracy: 0.38, precision: 0.19, recall: 0.50, f1-score: 0.27,
(Epoch 3), time: 8.7s, loss: 0.695
    Training Set - accuracy: 0.59, precision: 0.30, recall: 0.50, f1-score: 0.37,
    Validation Set - accuracy: 0.38, precision: 0.19, recall: 0.50, f1-score: 0.27,
(Epoch 4), time: 8.7s, loss: 0.689
    Training Set - accuracy: 0.59, precision: 0.30, recall: 0.50, f1-score: 0.37,
    Validation Set - accuracy: 0.38, precision: 0.19, rec

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaModel: ['lm_head.bias', 'lm_head.dense.bias', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'lm_head.dense.weight', 'lm_head.layer_norm.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Fold 4 (val 127 - 168)
(Epoch 0), time: 8.8s, loss: 0.696
    Training Set - accuracy: 0.57, precision: 0.29, recall: 0.50, f1-score: 0.37,
    Validation Set - accuracy: 0.45, precision: 0.23, recall: 0.50, f1-score: 0.31,
(Epoch 1), time: 8.8s, loss: 0.694
    Training Set - accuracy: 0.57, precision: 0.29, recall: 0.50, f1-score: 0.37,
    Validation Set - accuracy: 0.45, precision: 0.23, recall: 0.50, f1-score: 0.31,
(Epoch 2), time: 8.7s, loss: 0.692
    Training Set - accuracy: 0.57, precision: 0.29, recall: 0.50, f1-score: 0.37,
    Validation Set - accuracy: 0.45, precision: 0.23, recall: 0.50, f1-score: 0.31,
(Epoch 3), time: 8.8s, loss: 0.692
    Training Set - accuracy: 0.57, precision: 0.29, recall: 0.50, f1-score: 0.37,
    Validation Set - accuracy: 0.45, precision: 0.23, recall: 0.50, f1-score: 0.31,
(Epoch 4), time: 8.7s, loss: 0.694
    Training Set - accuracy: 0.57, precision: 0.29, recall: 0.50, f1-score: 0.37,
    Validation Set - accuracy: 0.45, precision: 0.23, re

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaModel: ['lm_head.bias', 'lm_head.dense.bias', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'lm_head.dense.weight', 'lm_head.layer_norm.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Fold 5 (val 169 - 210)
(Epoch 0), time: 8.8s, loss: 0.696
    Training Set - accuracy: 0.60, precision: 0.30, recall: 0.50, f1-score: 0.37,
    Validation Set - accuracy: 0.35, precision: 0.17, recall: 0.50, f1-score: 0.26,
(Epoch 1), time: 8.7s, loss: 0.694
    Training Set - accuracy: 0.60, precision: 0.30, recall: 0.50, f1-score: 0.37,
    Validation Set - accuracy: 0.35, precision: 0.17, recall: 0.50, f1-score: 0.26,
(Epoch 2), time: 8.7s, loss: 0.691
    Training Set - accuracy: 0.60, precision: 0.30, recall: 0.50, f1-score: 0.37,
    Validation Set - accuracy: 0.35, precision: 0.17, recall: 0.50, f1-score: 0.26,
(Epoch 3), time: 8.8s, loss: 0.691
    Training Set - accuracy: 0.60, precision: 0.30, recall: 0.50, f1-score: 0.37,
    Validation Set - accuracy: 0.35, precision: 0.17, recall: 0.50, f1-score: 0.26,
(Epoch 4), time: 8.7s, loss: 0.689
    Training Set - accuracy: 0.60, precision: 0.30, recall: 0.50, f1-score: 0.37,
    Validation Set - accuracy: 0.35, precision: 0.17, re