# Test Fine Tuning Model 

In [1]:
import os
os.environ["TOKENIZERS_PARALLELISM"] = "true"
from dataclasses import dataclass
import pandas as pd
import plotly.express as px
import torch as t
import transformers
from bert_finetuning import BERTLanguageMODEL, BertClassifier
from src.transformers import TransformerConfig


config = {
    'batch_size': 8,
    'hidden_size': 768,
    'lr': 1e-5,
    'seq_len': 512,
    'num_layers': 12,
    'num_heads': 12,
    'vocab_size': 28996,
    'num_epochs': 1,
    'device': 'mps',
    'dropout': 0.1,
    'layer_norm_epsilon': 1e-12,
    'train_set_size': 1000,
    'test_set_size': 1000,
    'num_workers': 2,
}

transformer_config = TransformerConfig(
    hidden_size=config["hidden_size"],
    num_heads=config["num_heads"],
    num_layers= config["num_layers"],
    layer_norm_epsilon=config["layer_norm_epsilon"],
    max_seq_len=config["seq_len"],
    dropout=config["dropout"],
    vocab_size=config["vocab_size"],
)

my_bert = BERTLanguageMODEL(transformer_config)
my_bert_classifier = BertClassifier(my_bert.bert, 2)
my_bert_classifier.load_state_dict(t.load("/Users/josephbloom/GithubRepositories/arena-v1-ldn/bert_imdb_model.pt"))
#my_bert_classifier.eval()

<All keys matched successfully>

In [2]:
DATA_FOLDER = "./data/bert-imdb/"
IMDB_PATH = os.path.join(DATA_FOLDER, "acllmdb_v1.tar.gz")
SAVED_TOKENS_PATH = os.path.join(DATA_FOLDER, "tokens.pt")
train_data, test_data = t.load(SAVED_TOKENS_PATH)

In [3]:
tokenizer = transformers.AutoTokenizer.from_pretrained("bert-base-cased")

In [25]:
import random 
from torch.utils.data import DataLoader

true_star = []
pred_star = []
true_sent = []
pred_sent = []

testloader = DataLoader(random.sample(list(test_data),200), batch_size=8, shuffle=False, num_workers=0)

my_bert_classifier.eval()
for i, data in enumerate(testloader):
    inputs, masks, sentiment_labels, star_labels = data
    sentiment, star = my_bert_classifier(inputs, masks)
    pred_sentiment = t.softmax(sentiment, dim=1)[:,1] # prob of good review
    for j in range(len(sentiment_labels)):
        pred_sent.append(pred_sentiment[j].item())
        true_sent.append(sentiment_labels[j].item())
        pred_star.append(star[j].item())
        true_star.append(star_labels[j].item())

df = pd.DataFrame({"true_sent": true_sent, "pred_sent": pred_sent, "true_star": true_star, "pred_star": pred_star})
df.head()

Unnamed: 0,true_sent,pred_sent,true_star,pred_star
0,0,0.060224,1,2.261431
1,0,0.08518,4,3.100718
2,0,0.063187,2,3.535131
3,0,0.079393,1,3.254955
4,1,0.764828,8,6.255582


In [26]:
px.violin(df, y="pred_sent", box=True, points="all", color="true_sent", template="plotly_dark")

In [28]:
px.box(df, x="true_star", y="pred_star", template="plotly_dark")