In [None]:
import torch
import numpy as np
from datasets import Dataset
from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    Trainer,
    DataCollatorWithPadding,
)
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
import pandas as pd
import os

In [None]:
def main():
    # Load in model
    model = AutoModelForSequenceClassification.from_pretrained("./distilbert_ai_detector")
    tokenizer = AutoTokenizer.from_pretrained("./distilbert_ai_detector")
    data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

    def preprocess_function(examples):
        # Tokenize the text. `truncation=True` ensures texts longer than the model's max input size are cut.
        # `padding=True` or handled by DataCollator later. Let's rely on DataCollator here.
        return tokenizer(examples["text"], truncation=True, max_length=512) # Adjust max_length if needed
    
    test_dir_path = os.path.join(os.getcwd(), "test_data")

    test_files = [] # Add names of your test files here

    data_frames = []
    for i in range(len(test_files)):
        file_path = os.path.join(test_dir_path, test_files[i])
        data_frames.append(pd.read_json(file_path, lines=True))

    test_df = pd.concat(data_frames, ignore_index=True)

    # --- Process and format dev dataset ---

    # Label human text from dev set
    human_df = pd.DataFrame(test_df['human_text'])
    human_df.columns = ['text']
    human_df['generated'] = 0

    # Label AI text from dev set
    ai_df = pd.DataFrame(test_df['machine_text'])
    ai_df.columns = ['text']
    ai_df['generated'] = 1

    merged_df = pd.concat([human_df, ai_df], ignore_index=True)

    test_df = merged_df[merged_df['text'] != ""]

    # --- Preprocess Dev Data (Tokenization) ---

    # Preprocess Data Start
    texts = test_df['text'].tolist()
    labels = test_df['generated'].tolist()

    test_data = {"text": texts, "labels": labels}
    test_dataset = Dataset.from_dict(test_data)

    # Apply the tokenizer to the datasets
    tokenized_test_dataset = test_dataset.map(preprocess_function, batched=True)

    # Remove the original 'text' column as it's no longer needed after tokenization
    tokenized_test_dataset = tokenized_test_dataset.remove_columns(["text"])

    # Set the format to PyTorch tensors
    tokenized_test_dataset.set_format("torch")

    def compute_metrics(eval_pred):
        logits, labels = eval_pred
        predictions = np.argmax(logits, axis=-1)

        # Calculate precision, recall, F1 score with 'binary' average for binary classification
        precision, recall, f1, _ = precision_recall_fscore_support(labels, predictions, average='binary')
        acc = accuracy_score(labels, predictions)

        return {
            'accuracy': acc,
            'f1': f1,
            'precision': precision,
            'recall': recall
        }
    
    trainer = Trainer(
        model=model,
        tokenizer=tokenizer,                    
        data_collator=data_collator,            
        compute_metrics=compute_metrics, 
    )

    eval_results = trainer.evaluate(
        eval_dataset=tokenized_test_dataset,
    )

    print("Evaluation results:", eval_results)

In [None]:
# Before you run add test file names to test_files
main()