In [1]:
!pip install transformers[torch] datasets
!pip install accelerate -U

Collecting datasets
  Downloading datasets-2.15.0-py3-none-any.whl (521 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m521.2/521.2 kB[0m [31m2.9 MB/s[0m eta [36m0:00:00[0m
Collecting accelerate>=0.20.3 (from transformers[torch])
  Downloading accelerate-0.25.0-py3-none-any.whl (265 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m265.7/265.7 kB[0m [31m23.2 MB/s[0m eta [36m0:00:00[0m
Collecting pyarrow-hotfix (from datasets)
  Downloading pyarrow_hotfix-0.6-py3-none-any.whl (7.9 kB)
Collecting dill<0.3.8,>=0.3.0 (from datasets)
  Downloading dill-0.3.7-py3-none-any.whl (115 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m115.3/115.3 kB[0m [31m15.8 MB/s[0m eta [36m0:00:00[0m
Collecting multiprocess (from datasets)
  Downloading multiprocess-0.70.15-py310-none-any.whl (134 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m134.8/134.8 kB[0m [31m1.5 MB/s[0m eta [36m0:00:00[0m
Installing collect

In [4]:
import torch.nn as nn
import numpy as np

from datasets import load_dataset, load_metric
from transformers import DistilBertForSequenceClassification, DistilBertTokenizer
from transformers import TrainingArguments, Trainer
from sklearn.metrics import precision_recall_fscore_support

# Define dataset
dataset = load_dataset('csv', data_files={'train': 'train.csv', 'test': 'test.csv'})

# Define model and tokenizer
checkpoint = "distilbert-base-cased"
tokenizer = DistilBertTokenizer.from_pretrained(checkpoint)
model = DistilBertForSequenceClassification.from_pretrained(checkpoint, num_labels=4)

# Function to tokenize examples
def tokenize_function(examples):
    inputs = tokenizer(examples["sentence"], padding="max_length", truncation=True)
    inputs["labels"] = examples["tier"]

    return inputs

tokenized_datasets = dataset.map(tokenize_function, batched=True)

metric = load_metric("accuracy")

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)

    # Accuracy
    acc = metric.compute(predictions=predictions, references=labels)

    # Precision, Recall, F1-score
    precision, recall, f1, _ = precision_recall_fscore_support(labels, predictions, average='weighted')

    return {
        'accuracy': acc['accuracy'],
        'precision': precision,
        'recall': recall,
        'f1': f1,
    }

class CustomTrainer(Trainer):
    def compute_loss(self, model, inputs, return_outputs=False):
        labels = inputs.pop("labels")
        outputs = model(**inputs)
        logits = outputs.logits
        loss_fn = nn.CrossEntropyLoss()
        loss = loss_fn(logits.view(-1, self.model.config.num_labels), labels.view(-1))
        return (loss, outputs) if return_outputs else loss

# Create training arguments
training_args = TrainingArguments(
    output_dir="test_trainer",
    evaluation_strategy="epoch",
    num_train_epochs=10,
    logging_dir="logs",
    logging_steps=500,
)

# Initialize trainer
trainer = CustomTrainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["test"],
    compute_metrics=compute_metrics,
)

trainer.train()

Downloading data files:   0%|          | 0/2 [00:00<?, ?it/s]

Extracting data files:   0%|          | 0/2 [00:00<?, ?it/s]

Generating train split: 0 examples [00:00, ? examples/s]

Generating test split: 0 examples [00:00, ? examples/s]

tokenizer_config.json:   0%|          | 0.00/29.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/213k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/436k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/465 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/263M [00:00<?, ?B/s]

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-cased and are newly initialized: ['pre_classifier.bias', 'pre_classifier.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/4344 [00:00<?, ? examples/s]

Map:   0%|          | 0/1086 [00:00<?, ? examples/s]

  metric = load_metric("accuracy")


Downloading builder script:   0%|          | 0.00/1.65k [00:00<?, ?B/s]

Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.5201,0.284461,0.907919,0.907843,0.907919,0.907736
2,0.2375,0.314124,0.905157,0.908509,0.905157,0.905621
3,0.1666,0.419057,0.911602,0.914727,0.911602,0.912129
4,0.1173,0.384763,0.924494,0.924642,0.924494,0.924473
5,0.0781,0.481375,0.911602,0.913907,0.911602,0.91192
6,0.0894,0.527616,0.913444,0.913748,0.913444,0.913504
7,0.0642,0.482124,0.921731,0.922458,0.921731,0.921893
8,0.0532,0.570912,0.91989,0.921135,0.91989,0.920107
9,0.0536,0.585588,0.91989,0.921984,0.91989,0.920154
10,0.0288,0.588948,0.91989,0.921211,0.91989,0.920084


TrainOutput(global_step=5430, training_loss=0.1324806687581605, metrics={'train_runtime': 817.6797, 'train_samples_per_second': 53.126, 'train_steps_per_second': 6.641, 'total_flos': 5754589040148480.0, 'train_loss': 0.1324806687581605, 'epoch': 10.0})