In [None]:
!pip install transformers
!pip install datasets

In [None]:
from datasets import load_dataset
from sklearn.preprocessing import MinMaxScaler
import torch
import numpy as np
import random
from torch.utils.data import DataLoader
from transformers import AutoTokenizer, AutoModel
import torch.nn.functional as F
from sklearn.metrics.pairwise import cosine_similarity
from scipy import stats
import torch.nn as nn
from datetime import datetime
from utils_task3 import *

#for reproducibility
torch.manual_seed(0)
random.seed(0)
np.random.seed(0)

In [None]:
#load the data and process it
train_data, validation_data, test_data = get_data()
processed_train_data = process_data(train_data)
processed_validation_data = process_data(validation_data)
processed_test_data = process_data(test_data)

#make the train, validation, test
train_data_loader = DataLoader(processed_train_data, batch_size=32, shuffle=True)
validation_data_loader = DataLoader(processed_validation_data, batch_size=64, shuffle=True)
test_data_loader = DataLoader(processed_test_data, batch_size=8, shuffle=True)

In [None]:
#select the gpu if it is available
device = 'cpu'
if torch.cuda.is_available() == True:
  device = 'cuda'

#load the bert model, tokenizer and linear layer
tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')
model = BertLinear()

# take the model to the selected device
model.to(device)

#declare the optimizer with selected learning rate
optimizer = torch.optim.Adam(list(model.parameters()), lr = 0.00005)

In [None]:
epochs = 15

#for time detection
start_time = datetime.now()
_ = train(model, tokenizer, optimizer, epochs, train_data_loader, validation_data_loader, device) #starts the training process

end_time = datetime.now()
time_difference = end_time - start_time 
seconds = time_difference.total_seconds()
minutes = seconds / 60
epoch_minutes = minutes / epochs

print('Total minutes to train : '+str(minutes))
print('Average minutes per epoch : '+str(epoch_minutes))

#loads the saved models
model, tokenizer = load_models(device)
test(model, tokenizer, test_data_loader, device) #test the 



In [None]:
device = 'cpu'
if torch.cuda.is_available() == True:
  device = 'cuda'

tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')
pretrained_model = AutoModel.from_pretrained('bert-base-uncased')
trained_bert_linear_model, _ = load_models(device)

In [None]:
!pip install bertviz

In [None]:
from bertviz import head_view, model_view
sentence = None
for data in test_data_loader:
  sentence = data[0][0]
  break

In [None]:
sentence_a, sentence_b = sentence.split('[SEP]')
sentence_a, sentence_b = sentence_a.strip(), sentence_b.strip() 

In [None]:
inputs = tokenizer.encode_plus(sentence_a, sentence_b, return_tensors='pt')
input_ids = inputs['input_ids']
token_type_ids = inputs['token_type_ids']
attention = pretrained_model(input_ids, token_type_ids=token_type_ids, output_attentions=True)[-1]
sentence_b_start = token_type_ids[0].tolist().index(1)
input_id_list = input_ids[0].tolist() # Batch index 0
tokens = tokenizer.convert_ids_to_tokens(input_id_list)
head_view(attention, tokens, sentence_b_start)

In [None]:
inputs = tokenizer.encode_plus(sentence_a, sentence_b, return_tensors='pt')
input_ids = inputs['input_ids'].to('cuda')
token_type_ids = inputs['token_type_ids'].to('cuda')
attention = trained_bert_linear_model.bert(input_ids, token_type_ids=token_type_ids, output_attentions=True)[-1]
sentence_b_start = token_type_ids[0].tolist().index(1)
input_id_list = input_ids[0].tolist() # Batch index 0
tokens = tokenizer.convert_ids_to_tokens(input_id_list)
head_view(attention, tokens, sentence_b_start)