In [2]:
pip install -U transformers datasets evaluate

Collecting transformers
  Using cached transformers-4.28.1-py3-none-any.whl (7.0 MB)
Collecting datasets
  Using cached datasets-2.12.0-py3-none-any.whl (474 kB)
Collecting evaluate
  Using cached evaluate-0.4.0-py3-none-any.whl (81 kB)
Collecting tokenizers!=0.11.3,<0.14,>=0.11.1
  Using cached tokenizers-0.13.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.8 MB)
Collecting filelock
  Using cached filelock-3.12.0-py3-none-any.whl (10 kB)
Collecting huggingface-hub<1.0,>=0.11.0
  Using cached huggingface_hub-0.14.1-py3-none-any.whl (224 kB)
Collecting xxhash
  Using cached xxhash-3.2.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (212 kB)
Collecting multiprocess
  Using cached multiprocess-0.70.14-py310-none-any.whl (134 kB)
Collecting aiohttp
  Using cached aiohttp-3.8.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.0 MB)
Collecting responses<0.19
  Using cached responses-0.18.0-py3-none-any.whl (38 kB)
Collecting pyarrow>=8.0.0
  Using ca

In [6]:
import numpy as np
from datasets import load_dataset, Dataset
import pandas as pd
from sklearn.model_selection import train_test_split
from transformers import AutoModelForSequenceClassification, AutoTokenizer, AutoModel
from transformers import TrainingArguments, Trainer
import evaluate
import torch.nn as nn
import torch

def compute_metrics(eval_pred):
    (logits, _), labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    # predictions = torch.max(logits, axis=1).indices
    return f1.compute(predictions=predictions, references=labels, average="macro")

def build_tokenizer_func(tokenizer):
    def tokenize_func(examples):
        return tokenizer(examples["text"], padding="max_length", truncation=True, max_length=200)
    return tokenize_func

def load_data_for_task(tokenizer, task="A", load_val=False):
    df = pd.read_csv(f"subtask{task}_train.csv", index_col=0)
    if task == "B":
        df.drop(columns=["topic"], inplace=True)
    df.columns = ["text", "labels"]
    
    if load_val:
        df_train, df_val = train_test_split(df, train_size=.8)
    else:
        df_train = df
    
    df_test = pd.read_csv(f"subtask{task}_test.csv", index_col=0)
    df_test.columns = ["text"]

    ds_train = Dataset.from_pandas(df_train, split="train")
    if load_val:
        ds_val = Dataset.from_pandas(df_val, split="test")
    ds_test = Dataset.from_pandas(df_test, split="test")
    
    return (ds_train, ds_val, ds_test) if load_val == True else (ds_train, ds_test)

In [11]:
from transformers import Trainer

class MultiTaskTrainer(Trainer):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.loss = nn.CrossEntropyLoss()
        # self.loss = nn.BCEWithLogitsLoss()
        
    def compute_loss(self, model, inputs, return_outputs=False):
        # implement custom logic here
        output = model(inputs["input_ids"], inputs["attention_mask"])
        
        loss = self.loss(output.get("logits"), inputs["labels"])
        loss = torch.tensor(7., requires_grad=True)
        if return_outputs:
            return loss, output
        return loss


In [12]:
model = AutoModelForSequenceClassification.from_pretrained("morenolq/bart-it", num_labels=4)
tokenizer = AutoTokenizer.from_pretrained("morenolq/bart-it")
ds_train, ds_val, ds_test = load_data_for_task(tokenizer, "B", load_val=True)

tok_func = build_tokenizer_func(tokenizer)
ds_train_tok = ds_train.map(tok_func, batched=True)
ds_val_tok = ds_val.map(tok_func, batched=False)
ds_test_tok = ds_test.map(tok_func, batched=False)

training_args = TrainingArguments(
    output_dir="test_trainer",
    evaluation_strategy="epoch",
    num_train_epochs=5,
)

f1 = evaluate.load("f1")

trainer = MultiTaskTrainer(
    model=model,
    args=training_args, 
    train_dataset=ds_train_tok,
    eval_dataset=ds_val_tok,
    compute_metrics=compute_metrics,
)

Some weights of the model checkpoint at morenolq/bart-it were not used when initializing BartForSequenceClassification: ['final_logits_bias', 'lm_head.weight']
- This IS expected if you are initializing BartForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BartForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BartForSequenceClassification were not initialized from the model checkpoint at morenolq/bart-it and are newly initialized: ['classification_head.dense.weight', 'classification_head.out_proj.weight', 'classification_head.dense.bias', 'classification_head.out_proj.bias']
You should probably TRAIN this model on a down-stream task t

Map:   0%|          | 0/648 [00:00<?, ? examples/s]

Map:   0%|          | 0/162 [00:00<?, ? examples/s]

Map:   0%|          | 0/300 [00:00<?, ? examples/s]

In [13]:
trainer.train()



Epoch,Training Loss,Validation Loss,F1
1,No log,7.0,0.220833
2,No log,7.0,0.220833
3,No log,7.0,0.220833
4,No log,7.0,0.220833
5,No log,7.0,0.220833


TrainOutput(global_step=405, training_loss=7.0, metrics={'train_runtime': 20.7951, 'train_samples_per_second': 155.806, 'train_steps_per_second': 19.476, 'total_flos': 388157033664000.0, 'train_loss': 7.0, 'epoch': 5.0})

In [6]:
import torch

In [7]:
with torch.no_grad():
    model.eval()
    y_pred = model(torch.tensor(ds_test_tok["input_ids"]).cuda(), torch.tensor(ds_test_tok["attention_mask"]).cuda())
    model.train()

RuntimeError: CUDA out of memory. Tried to allocate 704.00 MiB (GPU 0; 15.77 GiB total capacity; 14.28 GiB already allocated; 30.88 MiB free; 14.64 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF