In [1]:
from transformers import AutoModelForSequenceClassification, AutoTokenizer
from peft import PeftModel
import torch
from torch.nn.functional import sigmoid

model_name = "roberta-large"
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained("../model/lora-roberta-productivity")

# Load base model
base_model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=1)

# Attach LoRA adapters
model = PeftModel.from_pretrained(base_model, "../model/lora-roberta-productivity")
model.to(device)
model.eval()

def predict_productivity(title: str) -> float:
    inputs = tokenizer(title, return_tensors="pt", truncation=True, padding=True, max_length=32).to(device)
    with torch.no_grad():
        logits = model(**inputs).logits
        prob = sigmoid(logits).item()
    return prob


  from .autonotebook import tqdm as notebook_tqdm
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [2]:
print(predict_productivity("Pomodoro technique for deep work"))

0.7051305174827576


In [3]:
import pandas as pd
from sklearn.metrics import log_loss, accuracy_score

# Load test set
test_df = pd.read_csv("../model/dataset_building/yt_watch_data_clean.csv")

# Get predicted probabilities and labels
test_df["pred_prob"] = test_df["videoTitle"].apply(predict_productivity)
productive_threshold = 0.65
test_df["pred_label"] = (test_df["pred_prob"] >= productive_threshold).astype(int)

# true labels
y_true = test_df["label"]

# predicted labels
y_prob = test_df["pred_prob"]
y_pred = test_df["pred_label"]

# # log loss (cross entropy)
# ce_loss = log_loss(y_true, y_prob)
# print("Cross-Entropy Loss:", ce_loss)

# Accuracy with productivity threshold
accuracy = accuracy_score(y_true, y_pred)
print(f"Accuracy with productivity threshold of ({productive_threshold}): {accuracy}")

# save for inspection
test_df.to_csv("test_df.csv", index=False)


Accuracy with productivity threshold of (0.65): 0.9397590361445783
