In [1]:
import pandas as pd


df = pd.read_csv("../dataset_prepare/toy_test_with_summaries_and_mimicked_writings.csv")
df

Unnamed: 0,dataset,author,text,topic,gender,age,sign,date,file_name,subject,index,subreddit,summary,Sample Indices,Prompt,openai/gpt-4.1-mini-2025-04-14-sample
0,blog,15365,"'Bathrooms, hallway corners, laundr...",indUnk,female,34.0,Cancer,"28,July,2004",,,,,"Choose accessible locations like bathrooms, ha...",41203,You will be given one or more writing samples ...,Choosing the right spot for your kitten’s litt...
1,blog,15365,urlLink June 2003 Outlook from Moonsur...,indUnk,female,34.0,Cancer,"07,June,2004",,,,,"The Gemini New Moon on May 30th, 2003, marks a...",13024,You will be given one or more writing samples ...,urlLink Cosmic Currents - Gemini New Moon Ushe...
2,blog,15365,urlLink SAGITTARIUS LUNAR CYCLE by Cl...,indUnk,female,34.0,Cancer,"07,June,2004",,,,,"The Sagittarius lunar cycle, marked by a total...",24031,You will be given one or more writing samples ...,"The Sagittarius lunar cycle, illuminated by th..."
3,blog,15365,"The Beatles Title: Let It Be (Lennon, M...",indUnk,female,34.0,Cancer,"12,October,2002",,,,,"""Let It Be"" by The Beatles is a song expressin...",02413,You will be given one or more writing samples ...,urlLink Let It Be: Words of Comfort in Trouble...
4,blog,15365,THE MOON WAS STILL UP Anger and pain I c...,indUnk,female,34.0,Cancer,"14,September,2003",,,,,The text explores deep emotions of anger and p...,42130,You will be given one or more writing samples ...,urlLink Beneath the Surface: The Unyielding Fl...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
395,reddit,wonderfuldog,Occam's Razor]( \n A guy is found dead of a g...,,,,,,,,446467.0,atheism,The text explains Occam's Razor by comparing t...,397399396395398,You will be given one or more writing samples ...,Occam's Razor is a principle that gets brought...
396,reddit,wonderfuldog,"If we lived side-by-side with dinosaurs, don't...",,,,,,,,849514.0,atheism,"The text questions why, if humans lived alongs...",396395399397398,You will be given one or more writing samples ...,reposting]\n\nOne question that seems to get o...
397,reddit,wonderfuldog,it means different things to different people ...,,,,,,,,1242640.0,atheism,The text emphasizes the importance of a shared...,398399396395397,You will be given one or more writing samples ...,Anyone who’s ever tried to have a serious conv...
398,reddit,wonderfuldog,Do you think there is any truth to people sayi...,,,,,,,,609221.0,atheism,The text addresses the criticism that r/atheis...,397399395398396,You will be given one or more writing samples ...,reposting] \nWe get this criticism here fairl...


In [2]:
df.columns

Index(['dataset', 'author', 'text', 'topic', 'gender', 'age', 'sign', 'date',
       'file_name', 'subject', 'index', 'subreddit', 'summary',
       'Sample Indices', 'Prompt', 'openai/gpt-4.1-mini-2025-04-14-sample'],
      dtype='object')

In [3]:
df.dataset.unique()

array(['blog', 'CCAT50', 'enron', 'reddit'], dtype=object)

In [4]:
blog = df.copy()[df["dataset"] == "blog"].reset_index()
CCAT50 = df.copy()[df["dataset"] == "CCAT50"].reset_index()
enron = df.copy()[df["dataset"] == "enron"].reset_index()
reddit = df.copy()[df["dataset"] == "reddit"].reset_index()
blog.shape, CCAT50.shape, enron.shape, reddit.shape

((100, 17), (100, 17), (100, 17), (100, 17))

In [11]:
import torch
from torch.utils.data import Dataset
from transformers import AutoTokenizer
from sklearn.metrics import classification_report
from transformers import Trainer, AutoModelForSequenceClassification


model_name = "allenai/longformer-base-4096"
tokenizer = AutoTokenizer.from_pretrained(model_name)

def get_text_encodings(df, text_col="text", 
                       LLM_col="openai/gpt-4.1-mini-2025-04-14-sample"):
    return tokenizer(list(df[text_col]), list(df[LLM_col]), 
                     truncation=True, padding="max_length", max_length=2048)


class CustomDataset(Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)


def get_dataset(df, text1_col="text", 
                text2_col="openai/gpt-4.1-mini-2025-04-14-sample", 
                labels_col=None):
    
    encodings = get_text_encodings(df, text1_col, text2_col)

    if labels_col is None:
        labels = [0] * len(encodings.input_ids)
    else:
        labels = df[labels_col].tolist()

    dataset = CustomDataset(encodings, labels)
    return dataset


def get_model_and_trainer(model_load_file):
    model = AutoModelForSequenceClassification.from_pretrained(model_load_file)
    trainer = Trainer(
        model=model,
    )
    return model, trainer


def print_classification_report(y_test, y_pred):
    print(classification_report(y_test, y_pred, zero_division=0))


def print_eval_report(trainer, test_dataset):
    predictions = trainer.predict(test_dataset)
    y_pred = predictions.predictions.argmax(-1)
    y_test = [0] * len(test_dataset.labels)
    print_classification_report(y_test, y_pred)

### Blog

              precision    recall  f1-score   support

           0       0.86      0.94      0.90     12000
           1       0.89      0.78      0.83      8000

    accuracy                           0.87     20000
   macro avg       0.88      0.86      0.86     20000
weighted avg       0.87      0.87      0.87     20000

In [15]:
test_results = pd.read_csv("../dataset_prepare/blog_AV_datasets/test_results.csv")
print(classification_report(test_results["label"], test_results["prediction"], zero_division=0))

              precision    recall  f1-score   support

           0       0.86      0.94      0.90     12000
           1       0.89      0.78      0.83      8000

    accuracy                           0.87     20000
   macro avg       0.88      0.86      0.86     20000
weighted avg       0.87      0.87      0.87     20000



In [12]:
blog_dataset = get_dataset(blog)
b_test = pd.read_csv("../dataset_prepare/blog_AV_datasets/test.csv")
blog_dataset_test = get_dataset(pd.read_csv("../dataset_prepare/blog_AV_datasets/test.csv"), "text1", "text2", "label")

blog_model_load_file = "../AV_models/longformer-base-4096/blog_AV_datasets/checkpoint-10000"
blog_model, blog_trainer = get_model_and_trainer(blog_model_load_file)

print_eval_report(blog_trainer, blog_dataset_test)

print("Blog dataset evaluation:")

print_eval_report(blog_trainer, blog_dataset)

Initializing global attention on CLS token...


              precision    recall  f1-score   support

           0       1.00      0.65      0.79     20000
           1       0.00      0.00      0.00         0

    accuracy                           0.65     20000
   macro avg       0.50      0.33      0.39     20000
weighted avg       1.00      0.65      0.79     20000

Blog dataset evaluation:


              precision    recall  f1-score   support

           0       1.00      0.15      0.26       100
           1       0.00      0.00      0.00         0

    accuracy                           0.15       100
   macro avg       0.50      0.07      0.13       100
weighted avg       1.00      0.15      0.26       100



In [17]:
CCAT50_dataset = get_dataset(CCAT50)
c_test = pd.read_csv("../dataset_prepare/CCAT50_AV_datasets/test.csv")
CCAT50_dataset_test = get_dataset(pd.read_csv("../dataset_prepare/CCAT50_AV_datasets/test.csv"), "text1", "text2", "label")
CCAT50_model_load_file = "../AV_models/longformer-base-4096/CCAT50_AV_datasets/checkpoint-5000"
CCAT50_model, CCAT50_trainer = get_model_and_trainer(CCAT50_model_load_file)

print_eval_report(CCAT50_trainer, CCAT50_dataset_test)
print("CCAT50 dataset evaluation:")
print_eval_report(CCAT50_trainer, CCAT50_dataset)

              precision    recall  f1-score   support

           0       1.00      0.65      0.79     20000
           1       0.00      0.00      0.00         0

    accuracy                           0.65     20000
   macro avg       0.50      0.33      0.39     20000
weighted avg       1.00      0.65      0.79     20000

CCAT50 dataset evaluation:


              precision    recall  f1-score   support

           0       1.00      0.07      0.13       100
           1       0.00      0.00      0.00         0

    accuracy                           0.07       100
   macro avg       0.50      0.04      0.07       100
weighted avg       1.00      0.07      0.13       100



In [18]:
enron_dataset = get_dataset(enron)
e_test = pd.read_csv("../dataset_prepare/enron_AV_datasets/test.csv")
enron_dataset_test = get_dataset(pd.read_csv("../dataset_prepare/enron_AV_datasets/test.csv"), "text1", "text2", "label")
enron_model_load_file = "../AV_models/longformer-base-4096/enron_AV_datasets/checkpoint-5000"
enron_model, enron_trainer = get_model_and_trainer(enron_model_load_file)

print_eval_report(enron_trainer, enron_dataset_test)
print("Enron dataset evaluation:")
print_eval_report(enron_trainer, enron_dataset)

              precision    recall  f1-score   support

           0       1.00      0.64      0.78     20000
           1       0.00      0.00      0.00         0

    accuracy                           0.64     20000
   macro avg       0.50      0.32      0.39     20000
weighted avg       1.00      0.64      0.78     20000

Enron dataset evaluation:


              precision    recall  f1-score   support

           0       1.00      0.13      0.23       100
           1       0.00      0.00      0.00         0

    accuracy                           0.13       100
   macro avg       0.50      0.07      0.12       100
weighted avg       1.00      0.13      0.23       100



In [20]:
reddit_dataset = get_dataset(reddit)
r_test = pd.read_csv("../dataset_prepare/reddit_AV_datasets/test.csv")
reddit_dataset_test = get_dataset(pd.read_csv("../dataset_prepare/reddit_AV_datasets/test.csv"), "text1", "text2", "label")

reddit_model_load_file = "../AV_models/longformer-base-4096/reddit_AV_datasets/checkpoint-5000"
reddit_model, reddit_trainer = get_model_and_trainer(reddit_model_load_file)

print_eval_report(reddit_trainer, reddit_dataset_test)
print("Reddit dataset evaluation:")
print_eval_report(reddit_trainer, reddit_dataset)

              precision    recall  f1-score   support

           0       1.00      0.67      0.80     20000
           1       0.00      0.00      0.00         0

    accuracy                           0.67     20000
   macro avg       0.50      0.33      0.40     20000
weighted avg       1.00      0.67      0.80     20000

Reddit dataset evaluation:


              precision    recall  f1-score   support

           0       1.00      0.37      0.54       100
           1       0.00      0.00      0.00         0

    accuracy                           0.37       100
   macro avg       0.50      0.18      0.27       100
weighted avg       1.00      0.37      0.54       100

