In [1]:
import pandas as pd
import torch
import matplotlib.pyplot as plt
import seaborn as sns

! pip install transformers
from transformers import get_linear_schedule_with_warmup
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

from utils import load_data, transform_dataset
from dataset import BertDataset, encode_labels, decode_labels, create_data_dataloader, labels_decoding
from model import BertClassifier
from training import training, eval_model, predict

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [2]:
!nvidia-smi

Fri Feb 10 10:56:36 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 510.47.03    Driver Version: 510.47.03    CUDA Version: 11.6     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   67C    P0    31W /  70W |      0MiB / 15360MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [3]:
# HYPERPARAMETERS AND CONSTANTS
LR = 1e-6
EPOCHS = 20
BATCH_SIZE = 16
n_classes = 6

# DEVICE
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(f"Current device: {device}")

Current device: cuda:0


# Data loading

In [4]:
# Load and transform data(train, val, test)
train_data = transform_dataset(load_data('train'))
train_data = encode_labels(train_data)
val_data = transform_dataset(load_data('val'))
val_data = encode_labels(val_data)
test_data = transform_dataset(load_data('test'))
test_data = encode_labels(test_data)

In [5]:
# Create data loaders
train_data_loader = create_data_dataloader(train_data, BATCH_SIZE)
val_data_loader = create_data_dataloader(val_data, BATCH_SIZE)
test_data_loader = create_data_dataloader(test_data, BATCH_SIZE)

# Model initialization and training

In [6]:
# Init model
model = BertClassifier(n_classes)
model = model.to(device)

# Init optimizer and scheduler
optimizer = torch.optim.AdamW(model.parameters(), lr=LR)
total_steps = len(train_data_loader) * EPOCHS

scheduler = get_linear_schedule_with_warmup(optimizer,
                                            num_warmup_steps=0,
                                            num_training_steps=total_steps)

# Init loss function
loss_fn = torch.nn.CrossEntropyLoss()
loss_fn = loss_fn.to(device)

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [None]:
history = training(model, train_data_loader, val_data_loader, loss_fn, optimizer, scheduler, device, EPOCHS)

Epoch 1/20
----------


100%|██████████| 1000/1000 [03:51<00:00,  4.32it/s]


Train Accuracy:  0.323
Train Loss:  1.66466


100%|██████████| 125/125 [00:08<00:00, 14.33it/s]


Val   Accuracy:  0.516
Val   Loss:  1.42904

Epoch 2/20
----------


100%|██████████| 1000/1000 [03:46<00:00,  4.42it/s]


Train Accuracy:  0.536
Train Loss:  1.31750


100%|██████████| 125/125 [00:08<00:00, 14.41it/s]


Val   Accuracy:  0.587
Val   Loss:  1.11529

Epoch 3/20
----------


100%|██████████| 1000/1000 [03:46<00:00,  4.42it/s]


Train Accuracy:  0.636
Train Loss:  1.04569


100%|██████████| 125/125 [00:08<00:00, 14.63it/s]


Val   Accuracy:  0.707
Val   Loss:  0.86884

Epoch 4/20
----------


100%|██████████| 1000/1000 [03:46<00:00,  4.42it/s]


Train Accuracy:  0.732
Train Loss:  0.81351


100%|██████████| 125/125 [00:08<00:00, 14.43it/s]


Val   Accuracy:  0.764
Val   Loss:  0.66433

Epoch 5/20
----------


100%|██████████| 1000/1000 [03:45<00:00,  4.43it/s]


Train Accuracy:  0.791
Train Loss:  0.63052


100%|██████████| 125/125 [00:08<00:00, 14.32it/s]


Val   Accuracy:  0.812
Val   Loss:  0.51527

Epoch 6/20
----------


100%|██████████| 1000/1000 [03:46<00:00,  4.42it/s]


Train Accuracy:  0.833
Train Loss:  0.49903


100%|██████████| 125/125 [00:08<00:00, 14.35it/s]


Val   Accuracy:  0.864
Val   Loss:  0.42116

Epoch 7/20
----------


100%|██████████| 1000/1000 [03:45<00:00,  4.43it/s]


Train Accuracy:  0.865
Train Loss:  0.40712


100%|██████████| 125/125 [00:08<00:00, 14.67it/s]


Val   Accuracy:  0.890
Val   Loss:  0.35558

Epoch 8/20
----------


100%|██████████| 1000/1000 [03:45<00:00,  4.43it/s]


Train Accuracy:  0.889
Train Loss:  0.33464


100%|██████████| 125/125 [00:08<00:00, 14.39it/s]


Val   Accuracy:  0.897
Val   Loss:  0.31181

Epoch 9/20
----------


100%|██████████| 1000/1000 [03:45<00:00,  4.43it/s]


Train Accuracy:  0.905
Train Loss:  0.28806


100%|██████████| 125/125 [00:08<00:00, 14.43it/s]


Val   Accuracy:  0.905
Val   Loss:  0.27741

Epoch 10/20
----------


100%|██████████| 1000/1000 [03:45<00:00,  4.43it/s]


Train Accuracy:  0.916
Train Loss:  0.25141


100%|██████████| 125/125 [00:08<00:00, 14.41it/s]


Val   Accuracy:  0.910
Val   Loss:  0.25896

Epoch 11/20
----------


 73%|███████▎  | 734/1000 [02:46<01:00,  4.43it/s]

# Training visualization

In [None]:
plt.plot(history['train_acc'], label='train accuracy')
plt.plot(history['val_acc'], label='validation accuracy')

plt.title('Training history')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend()
plt.ylim([0, 1])
plt.savefig("training_history_acc.png")

In [None]:
plt.plot(history['train_loss'], label='train loss')
plt.plot(history['val_loss'], label='validation loss')

plt.title('Training history')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.savefig("training_history_loss.png")

# Results visualization

In [None]:
texts, predictions, prediction_probs, real_values = predict(model, test_data_loader)

print(f'Test Accuracy: {accuracy_score(real_values, predictions): .3f}')

In [None]:
print(classification_report(real_values, predictions, target_names=labels_decoding.values()))

In [None]:
sns.heatmap(confusion_matrix(real_values, predictions), annot=True, fmt='d', cmap='Blues', xticklabels=labels_decoding.values(), yticklabels=labels_decoding.values())

# Save model

In [None]:
torch.save(model.state_dict(), "fine-tunned_bert.pth")