In [22]:
import pandas as pd
import numpy as np
import torch

from tqdm import tqdm
from torch import nn
from transformers import AutoTokenizer
from torch.utils.data import Dataset, DataLoader
from timeit import default_timer as timer
from os import walk
from pathlib import Path
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score, roc_curve, auc, brier_score_loss

In [23]:
ROOT_DATA = '../../data'
HUMAN_JSON_FILE_NAME = 'human.jsonl'
HUMAN_JSON_PATH = f'{ROOT_DATA}/raw/{HUMAN_JSON_FILE_NAME}'
MODELS_JSON_FOLDER_PATH = f'{ROOT_DATA}/raw/machines'
BASELINE_MODELS_FOLDER_PATH = f'{ROOT_DATA}/baseline/models'

In [24]:
BATCH_SIZE = 32
LSTM_UNITS = 256
LSTM_LAYERS = 5
EMBEDDING_SIZE = 512

In [25]:
TEST_SET_FRACTION = 0.3

In [26]:
df = pd.read_json(path_or_buf=HUMAN_JSON_PATH, lines=True)
df['text_index'] = df.index
df['is_llm'] = 0
df['dataset_name'] = Path(HUMAN_JSON_FILE_NAME).stem

In [27]:
dir_path, dir_names, file_names = next(walk(MODELS_JSON_FOLDER_PATH))

for file_name in file_names:
    temp_df = pd.read_json(path_or_buf=f'{MODELS_JSON_FOLDER_PATH}/{file_name}', lines=True)
    temp_df['text_index'] = temp_df.index
    temp_df['is_llm'] = 1
    temp_df['dataset_name'] = Path(file_name).stem

    df = pd.concat([df, temp_df], ignore_index=True)

df.drop(labels=['id'], inplace=True, axis='columns')

In [28]:
checkpoint = 'distilbert-base-cased'
tokenizer = AutoTokenizer.from_pretrained(checkpoint)

In [29]:
df['tokenized_text'] = tokenizer(list(df['text'].to_list()))['input_ids']

Token indices sequence length is longer than the specified maximum sequence length for this model (843 > 512). Running this sequence through the model will result in indexing errors


In [30]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cpu


In [31]:
class RNN(nn.Module):
    def __init__(self, vocab_size, embedding_size, hidden_size, layers_num, device, output_size=1, dropout=0):
        super().__init__()

        self.vocab_size = vocab_size
        self.embedding_size = embedding_size
        self.hidden_size = hidden_size
        self.layers_num = layers_num
        self.output_size= output_size
        self.dropout = dropout
        self.device = device

        self.embed = nn.Embedding(self.vocab_size, self.embedding_size, device=self.device)

        self.lstm = nn.LSTM(
            input_size=self.embedding_size,
            hidden_size=self.hidden_size,
            num_layers=self.layers_num,
            batch_first=True,
            dropout=self.dropout,
            device=self.device
        )

        self.fc = nn.Linear(
            self.hidden_size,
            self.output_size
        )

    def forward(self, X, lengths):
        embeddings = self.embed(X)

        seq_output, (h_n, c_n) = self.lstm(embeddings)

        out = seq_output.sum(dim=1).div(lengths.float().unsqueeze(dim=1))
        logits = self.fc(out)
        return logits

In [32]:
class TextDataset(Dataset):
    def __init__(self, X, y):
        self.X = X
        self.y = y

    def __len__(self):
        return self.X.size

    def __getitem__(self, index):
        return (
            self.X[index],
            self.y[index]
        )

In [33]:
def collate_fn(batch):
  # We want to sort the batch by seq length,
  # in order to make the computation more efficient
  batch = sorted(batch, key=lambda x: len(x[0]), reverse=True)

  inputs = [torch.LongTensor(x[0]).to(device) for x in batch]
  padded_input = nn.utils.rnn.pad_sequence(inputs, batch_first=True)

  lengths = torch.LongTensor([len(x[0]) for x in batch]).to(device)

  y = torch.FloatTensor(np.array([x[1] for x in batch])).reshape(-1, 1).to(device)

  return padded_input, lengths, y

In [34]:
def calculate_accuracy(y_true, y_hat):
    correct_pred = torch.eq(torch.sigmoid(y_hat).round(), y_true).sum().item()
    return (correct_pred / len(y_hat)) * 100

def calculate_f1(y_true, y_hat):
    y_pred = torch.sigmoid(y_hat).round()
    return f1_score(y_true, y_pred)

def calculate_brier(y_true, y_hat):
    y_prob = torch.sigmoid(y_hat)
    return brier_score_loss(y_true, y_prob)

def calculate_auc(y_true, y_hat):
    y_prob = torch.sigmoid(y_hat)

    false_positive_rates, true_positive_rates, _ = roc_curve(y_true, y_prob)
    roc_auc = auc(false_positive_rates, true_positive_rates)

    return roc_auc, false_positive_rates, true_positive_rates

In [55]:
def test_step(model, dataloader, loss_fn, device):

    model.eval()

    all_y_true = []
    all_y_hat = []

    test_loss = 0
    steps = 0

    with torch.inference_mode():
        for X, lengths, y in dataloader:

            X, y = X.to(device), y.to(device)

            y_hat = model(X, lengths)

            all_y_true.extend(y)
            all_y_hat.extend(y_hat)

            loss = loss_fn(y_hat, y)
            test_loss += loss.item()

            steps += 1

        all_y_true = torch.FloatTensor(all_y_true)
        all_y_hat = torch.FloatTensor(all_y_hat)

        test_accuracy = calculate_accuracy(all_y_true, all_y_hat)
        test_f1 = calculate_f1(all_y_true, all_y_hat)
        test_brier = calculate_brier(all_y_true, all_y_hat)
        test_auc_tuple = calculate_auc(all_y_true, all_y_hat)

    return test_loss / steps, test_accuracy, test_f1, test_brier, test_auc_tuple

def test_against_all(model, trained_llm_name, human_test_df, df, loss_fn, device):
    all_results = []

    for dataset_name in df['dataset_name'].unique():
        if dataset_name in [trained_llm_name, Path(HUMAN_JSON_FILE_NAME).stem]:
            continue

        llm_df = df.loc[df['dataset_name'] == dataset_name]
        test_df = pd.concat([human_test_df, llm_df], ignore_index=True)

        test_dataset = TextDataset(test_df['tokenized_text'], test_df['is_llm'])
        test_dataloader = DataLoader(
            test_dataset,
            batch_size=BATCH_SIZE,
            shuffle=False,
            drop_last=False,
            collate_fn=collate_fn
        )

        start_time = timer()

        test_loss, test_acc, test_f1, test_brier, test_auc_tuple = test_step(
            model,
            test_dataloader,
            loss_fn,
            device
        )

        end_time = timer()

        results_formatted = {
            "test_loss": test_loss,
            "test_acc": test_acc,
            "test_f1": test_f1,
            "test_brier": test_brier,
            "test_auc_tuple": test_auc_tuple
        }
        
        print(
            f"against: {dataset_name} | "
            f"test_loss: {test_loss:.4f} | "
            f"test_acc: {test_acc:.4f} | "
            f"test_f1: {test_f1:.4f} | "
            f"test_brier: {test_brier:.4f} | "
            f"time: {(end_time-start_time):.4f}"
        )

        all_results.append({
            dataset_name: results_formatted
        })

    return all_results

In [56]:
human_df = df.loc[df['is_llm'] == 0]
human_train_df, human_test_df = train_test_split(human_df, test_size=TEST_SET_FRACTION, random_state=69)

In [57]:
final_results = []

for llm_name in df['dataset_name'].unique()[:2]:
    if llm_name == Path(HUMAN_JSON_FILE_NAME).stem:
        continue
    
    model_path = f'{BASELINE_MODELS_FOLDER_PATH}/{llm_name}.pt'

    model = RNN(tokenizer.vocab_size, EMBEDDING_SIZE, LSTM_UNITS, LSTM_LAYERS, device, dropout=0.6).to(device)
    model.load_state_dict(torch.load(model_path, map_location=device))

    loss_fn = nn.BCEWithLogitsLoss()

    print(f'Testing against all for {llm_name}...')

    results = test_against_all(
        model=model,
        trained_llm_name=llm_name,
        human_test_df=human_test_df,
        df=df,
        loss_fn=loss_fn,
        device=device
    )

    final_results.append({
        'model': llm_name,
        'results_against_all': results
    })

    print(f'Finished testing against all for {llm_name}')
    

Testing against all for alpaca-7b...
against: bigscience-bloomz-7b1 | test_loss: 1.8277 | test_acc: 43.9887 | test_f1: 0.4367 | test_brier: 0.4611 | time: 57.4598
against: chavinlo-alpaca-13b | test_loss: 0.2357 | test_acc: 95.1909 | test_f1: 0.9681 | test_brier: 0.0447 | time: 43.8330
against: gemini-pro | test_loss: 2.0200 | test_acc: 36.0679 | test_f1: 0.3014 | test_brier: 0.5272 | time: 57.6662
against: gpt-3.5-turbo-0125 | test_loss: 1.0577 | test_acc: 55.8699 | test_f1: 0.6036 | test_brier: 0.3160 | time: 49.8250
against: gpt-4-turbo-preview | test_loss: 1.3113 | test_acc: 41.6549 | test_f1: 0.3991 | test_brier: 0.4084 | time: 59.7093
against: meta-llama-llama-2-70b-chat-hf | test_loss: 1.8389 | test_acc: 37.4823 | test_f1: 0.3272 | test_brier: 0.4954 | time: 58.3678
against: meta-llama-llama-2-7b-chat-hf | test_loss: 1.8839 | test_acc: 37.3409 | test_f1: 0.3247 | test_brier: 0.5022 | time: 54.5227
against: mistralai-mistral-7b-instruct-v0.2 | test_loss: 1.8612 | test_acc: 36.138

In [49]:
print(final_results)

[{'model': 'alpaca-7b', 'results_against_all': [{'bigscience-bloomz-7b1': (1.827736152211825, 43.988684582743986, 0.4366998577524893, 0.46113130766759836, (0.7451181463444827, array([0.        , 0.0030581 , 0.0030581 , 0.00611621, 0.00611621,
       0.00917431, 0.00917431, 0.01223242, 0.01223242, 0.01529052,
       0.01529052, 0.01834862, 0.01834862, 0.02140673, 0.02140673,
       0.02446483, 0.02446483, 0.02752294, 0.02752294, 0.03058104,
       0.03058104, 0.03363914, 0.03363914, 0.03669725, 0.03669725,
       0.03975535, 0.03975535, 0.04281346, 0.04281346, 0.04587156,
       0.04587156, 0.04892966, 0.04892966, 0.05198777, 0.05198777,
       0.05504587, 0.05504587, 0.05810398, 0.05810398, 0.06116208,
       0.06116208, 0.06422018, 0.06422018, 0.06727829, 0.06727829,
       0.07033639, 0.07033639, 0.0733945 , 0.0733945 , 0.0764526 ,
       0.0764526 , 0.0795107 , 0.0795107 , 0.08256881, 0.08256881,
       0.08562691, 0.08562691, 0.08868502, 0.08868502, 0.09174312,
       0.09174312, 0