In [1]:
import logging
import torch
import numpy as np
import pandas as pd
from transformers import AutoTokenizer, Trainer
from datasets import Dataset, ClassLabel

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
from train_test_split import select_eval_with_cluster
from preprocessing import preprocess
from evaluation import evaluate
from bert import tokenize, get_BERT, prepare_dataset, compute_metrics

[nltk_data] Downloading package stopwords to /home/jonhue/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package omw-1.4 to /home/jonhue/nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!
[nltk_data] Downloading package wordnet to /home/jonhue/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


In [3]:
logging.basicConfig(level=logging.INFO)

In [4]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
device

'cuda'

In [5]:
MODEL = 'models/baseline'
TOKENIZER = 'bert-base-uncased' # 'cardiffnlp/twitter-roberta-base-sentiment-latest'
PREPROCESSING = None

In [6]:
df_cluster_map = pd.read_csv('clustering+bert/eval.csv')[['index', 'cluster']]
df_cluster_map

Unnamed: 0,index,cluster
0,922648.0,0
1,944379.0,4
2,2182552.0,4
3,786886.0,4
4,1130778.0,3
...,...,...
1249995,1478680.0,2
1249996,1972646.0,4
1249997,1710597.0,5
1249998,1835784.0,4


In [None]:
CLUSTERS = np.sort(df_cluster_map['cluster'].unique())
CLUSTERS

array([0, 1, 2, 3, 4, 5, 6])

In [8]:
model = get_BERT(MODEL, device)

In [None]:
def evaluate_cluster(cluster: int) -> float:
  print(f'====== EVALUATING CLUSTER {cluster} ======')
  df_eval = select_eval_with_cluster(df_cluster_map, cluster)
  dataset_eval = prepare_dataset(df_eval, preprocessing=PREPROCESSING)

  tokenizer = AutoTokenizer.from_pretrained(TOKENIZER)
  eval_tokenized = tokenize(dataset_eval, tokenizer)

  trainer = Trainer(model, eval_dataset=eval_tokenized, tokenizer=tokenizer, compute_metrics=compute_metrics)
  metrics = trainer.evaluate()
  print(metrics)
  return metrics

In [None]:
metrics = {}

for cluster in CLUSTERS:
  metrics[cluster] = evaluate_cluster(cluster)

In [None]:
pd.DataFrame(metrics)

Unnamed: 0,0,1,2,3,4,5,6
eval_loss,0.203119,0.201653,0.205061,0.205536,0.204494,0.242961,0.207357
eval_accuracy,0.916925,0.917425,0.91405,0.9154,0.91645,0.901128,0.915902
eval_precision,0.914096,0.913935,0.912044,0.913048,0.913859,0.90625,0.913507
eval_recall,0.92176,0.920688,0.916513,0.9194,0.920672,0.884882,0.921097
eval_f1,0.917912,0.917299,0.914273,0.916213,0.917253,0.895439,0.917286
eval_bce,0.203119,0.201653,0.205061,0.205536,0.204494,0.242961,0.207357
eval_auc,0.974774,0.975123,0.974425,0.974248,0.974502,0.966034,0.973816
eval_confidence,0.924203,0.924322,0.922995,0.924109,0.923573,0.920724,0.924075
eval_confidence_std,0.120274,0.119713,0.121096,0.119842,0.120218,0.123032,0.120127
eval_correct_confidence,0.940457,0.940472,0.940173,0.940449,0.939697,0.938692,0.94016
