In [1]:
import config
import dataset
import engine
import torch
import pandas as pd
import torch.nn as nn
import numpy as np

from model import ROBERTAClassifier
from sklearn import metrics
from transformers import AdamW
from transformers import get_linear_schedule_with_warmup

  from .autonotebook import tqdm as notebook_tqdm


In [6]:
def train(fold):
    # read the n-th fold training data
    data = pd.read_csv(config.TRAINING_FILE, sep="\t")
    data_train = data[data.fold != fold].reset_index(drop=True)
    data_valid = data[data.fold == fold].reset_index(drop=True)

    # initialize the dataset and dataloader
    train_dataset = dataset.ROBERTADataset(
        tweets=data_train.tweet.values,
        labels=data_train.label.values,
        lengths=data_train.length.values
    )
    train_data_loader = torch.utils.data.DataLoader(
        train_dataset,
        batch_size=config.TRAIN_BATCH_SIZE,
        num_workers=4
    )
    valid_dataset = dataset.ROBERTADataset(
        tweets=data_valid.tweet.values,
        labels=data_valid.label.values,
        lengths=data_valid.length.values
    )
    valid_data_loader = torch.utils.data.DataLoader(
        valid_dataset,
        batch_size=config.VALID_BATCH_SIZE,
        num_workers=1
    )

    # initialze the device
    device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
    # load the model and send it to device
    model = ROBERTAClassifier()
    model.to(device)

    # set parameters for the optimizer
    param_optimizer = list(model.named_parameters())
    no_decay = ["bias", "LayerNorm.bias", "LayerNorm.weight"]
    optimizer_parameters = [
        {
            "params": [
                p for n, p in param_optimizer if not any(nd in n for nd in no_decay)
            ],
            "weight_decay": 0.001,
        },
        {
            "params": [
                p for n, p in param_optimizer if any(nd in n for nd in no_decay)
            ],
            "weight_decay": 0.0,
        },
    ]

    # calculate the training steps
    num_train_steps = int(len(data_train) / config.TRAIN_BATCH_SIZE * config.EPOCH)

    # initialize the optimizer
    optimizer = AdamW(optimizer_parameters, lr=3e-5)

    # initialize the scheduler
    scheduler = get_linear_schedule_with_warmup(
        optimizer,
        num_warmup_steps=0,
        num_training_steps=num_train_steps
    )

    # start the training
    best_accuracy = 0
    for epoch in range(config.EPOCH):
        print(f"Training for Fold: {fold}, Epoch: {epoch}")
        engine.train_fn(train_data_loader, model, optimizer, device, scheduler)
        outputs, targets = engine.eval_fn(valid_data_loader, model, device)
        outputs = np.array(outputs) >= 0.5
        accuracy = metrics.accuracy_score(targets, outputs)
        f1_score = metrics.f1_score(targets, outputs)
        print(f"Fold: {fold}, Epoch: {epoch}, Accuracy Score = {accuracy}, F1 Score = {f1_score}")
        if accuracy > best_accuracy:
            torch.save(model.state_dict(), config.MODEL_FOLDER + str(fold)+ config.MODEL_PATH)
            best_accuracy = accuracy

In [7]:
train(1)
# train(2)
# train(3)
# train(4)
# train(5)

Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Training for Fold: 1, Epoch: 0


  0%|          | 0/2502 [00:00<?, ?it/s]


RuntimeError: Caught RuntimeError in DataLoader worker process 0.
Original Traceback (most recent call last):
  File "d:\APPS\Anaconda\envs\ML\Lib\site-packages\torch\utils\data\_utils\worker.py", line 308, in _worker_loop
    data = fetcher.fetch(index)
           ^^^^^^^^^^^^^^^^^^^^
  File "d:\APPS\Anaconda\envs\ML\Lib\site-packages\torch\utils\data\_utils\fetch.py", line 54, in fetch
    return self.collate_fn(data)
           ^^^^^^^^^^^^^^^^^^^^^
  File "d:\APPS\Anaconda\envs\ML\Lib\site-packages\torch\utils\data\_utils\collate.py", line 265, in default_collate
    return collate(batch, collate_fn_map=default_collate_fn_map)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\APPS\Anaconda\envs\ML\Lib\site-packages\torch\utils\data\_utils\collate.py", line 127, in collate
    return elem_type({key: collate([d[key] for d in batch], collate_fn_map=collate_fn_map) for key in elem})
                     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\APPS\Anaconda\envs\ML\Lib\site-packages\torch\utils\data\_utils\collate.py", line 127, in <dictcomp>
    return elem_type({key: collate([d[key] for d in batch], collate_fn_map=collate_fn_map) for key in elem})
                           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\APPS\Anaconda\envs\ML\Lib\site-packages\torch\utils\data\_utils\collate.py", line 119, in collate
    return collate_fn_map[elem_type](batch, collate_fn_map=collate_fn_map)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\APPS\Anaconda\envs\ML\Lib\site-packages\torch\utils\data\_utils\collate.py", line 161, in collate_tensor_fn
    out = elem.new(storage).resize_(len(batch), *list(elem.size()))
          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
RuntimeError: Trying to resize storage that is not resizable
