In [1]:
import pickle
import numpy as np
from pathlib import Path

import torch
import torch.nn as nn
from transformers import AutoModelForSequenceClassification, AutoTokenizer 
from torch.utils.data import Dataset, DataLoader

from dataset import HumorDataset
from metrics import metrics
from pickle_loader import load_pickle

from typing import Tuple, List, Dict
from tqdm import tqdm

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [2]:
model_name = "Reggie/muppet-roberta-base-joke_detector"
tokenizer = AutoTokenizer.from_pretrained(model_name, model_max_length=510)
model = AutoModelForSequenceClassification.from_pretrained(model_name).to(device)

In [3]:
dataset = HumorDataset()
predictions = []
true_y = []
for item in tqdm(dataset):
    tokenized_text = tokenizer(item[1], '', truncation=True, return_tensors="pt")

    output = model(tokenized_text["input_ids"].to(device))
    
    predictions.append(torch.softmax(output["logits"][0], -1).tolist()[1])
    true_y.append(item[4])

10166it [01:42, 99.34it/s] 


In [4]:
all_metrics = []
for threshold in [0.2, 0.4, 0.6, 0.8]:
    y_pred = []
    for pred in predictions:
        y_pred.append(1 if pred > threshold else 0)
    curr_metrics = metrics(y_pred, true_y)
    all_metrics.append((threshold, curr_metrics))

In [5]:
all_metrics

[(0.2, (0.5248868778280543, 0.5324442164657605, 0.40842022427700175)),
 (0.4, (0.5251819791461735, 0.5366342301087579, 0.36887664764902617)),
 (0.6, (0.5241983080857762, 0.5390724269377383, 0.3338579578988786)),
 (0.8, (0.5240999409797363, 0.5456238361266295, 0.28821562069643913))]