### Mount drive

In [None]:
from google.colab import drive
drive.mount('/content/drive')

### Install dependencies

In [None]:
import os
path = '/content/drive/MyDrive/Colab Notebooks/social_biases_frames'
os.chdir(path)
!pip install -r requirements.txt

### Imports

In [None]:
import nltk
import random
import numpy as np
import torch
#download stopwords
nltk.download('stopwords')
from dataset import SBICDataset
from model import SBICModel
from utils import print_rand_sentence_encoding, print_report
from plot import plot_confusion_matrix, plot_loss
from sklearn.metrics import classification_report

### Setting reproducibility

In [None]:
# set seeds
seed = 42
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed_all(seed)

### Loading dataset

In [None]:
# load dataset
dataset = SBICDataset()
# print some dataset info
print(dataset)
# Combine the training inputs into a TensorDataset.

train_set = dataset.get_tokenized_data(type='train')
val_set = dataset.get_tokenized_data(type='val')
test_set = dataset.get_tokenized_data(type='test')

In [None]:
# print random encoding
input_ids = train_set.tensors[0]
attention_masks = train_set.tensors[1]
sentences = dataset.get_data(type='train')['text']
print_rand_sentence_encoding(sentences, input_ids, attention_masks)

### Training & testing model

In [None]:
# create model
model = SBICModel()

In [None]:
# train model
training_stats = model.train(train_set, val_set, epochs=4, batch_size=64, lr = 2e-5)
plot_loss(training_stats)
print_report(training_stats)

In [None]:
model.load_best_model()
preds, labels = model.predict(test_set)
plot_confusion_matrix(preds, labels)
# print classification report
print(classification_report(labels, preds, digits=4))