#### In this notebook we are testing several fine tuned Distilbert models trained by us on different datasets from PAN: 'gemini-pro', 'gpt-4-turbo-preview', 'gpt-3.5-turbo-0125', 'mistralai-mixtral-8x7b-instruct-v0.1', 'bigscience-bloomz-7b1', 'meta-llama-llama-2-7b-chat-hf', 'chavinlo-alpaca-13b'.
#### All these are tested against texts from M4 coming from different generators from different domains: arxiv, reddit, wikihow and wikipedia. For each test the human texts per domain are combined with the texts from each LLM dataset.

In [1]:
import pandas as pd
import numpy as np
import torch
import json
import os
import torch.nn.functional as F

from tqdm import tqdm
from torch import nn
from transformers import DistilBertTokenizer, DistilBertForSequenceClassification
from torch.utils.data import Dataset, DataLoader, TensorDataset, SequentialSampler
from timeit import default_timer as timer
from os import walk
from pathlib import Path
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score, recall_score, precision_score, roc_curve, auc, brier_score_loss

In [8]:
ROOT_DATA = './'
M4_DATA_FOLDER_PATH = f'{ROOT_DATA}/raw/m4-unified'
FINE_TUNED_MODELS_FOLDER_PATH = f'{ROOT_DATA}/fine_tuned_models'

In [3]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [4]:
!ls gdrive/MyDrive/tupotiite_na_daniel

fine_tuned_distilberts.zip  m4-unified.zip


In [5]:
!unzip gdrive/MyDrive/tupotiite_na_daniel/fine_tuned_distilberts.zip

Archive:  gdrive/MyDrive/tupotiite_na_daniel/fine_tuned_distilberts.zip
   creating: fine_tuned_models/
   creating: fine_tuned_models/fine_tune_bert_gpt-4-turbo-preview.pt/
  inflating: fine_tuned_models/fine_tune_bert_gpt-4-turbo-preview.pt/config.json  
  inflating: fine_tuned_models/fine_tune_bert_gpt-4-turbo-preview.pt/model.safetensors  
   creating: fine_tuned_models/fine_tune_bert_chavinlo-alpaca-13b.pt/
  inflating: fine_tuned_models/fine_tune_bert_chavinlo-alpaca-13b.pt/config.json  
  inflating: fine_tuned_models/fine_tune_bert_chavinlo-alpaca-13b.pt/model.safetensors  
   creating: fine_tuned_models/fine_tune_bert_mistralai-mixtral-8x7b-instruct-v0.1.pt/
  inflating: fine_tuned_models/fine_tune_bert_mistralai-mixtral-8x7b-instruct-v0.1.pt/config.json  
  inflating: fine_tuned_models/fine_tune_bert_mistralai-mixtral-8x7b-instruct-v0.1.pt/model.safetensors  
   creating: fine_tuned_models/fine_tune_bert_gemini-pro.pt/
  inflating: fine_tuned_models/fine_tune_bert_gemini-pro.p

In [6]:
!unzip gdrive/MyDrive/tupotiite_na_daniel/m4-unified.zip

Archive:  gdrive/MyDrive/tupotiite_na_daniel/m4-unified.zip
   creating: m4-unified/arxiv/
  inflating: m4-unified/arxiv/arxiv_bloomz.jsonl  
  inflating: m4-unified/arxiv/arxiv_chatGPT.jsonl  
  inflating: m4-unified/arxiv/arxiv_cohere.jsonl  
  inflating: m4-unified/arxiv/arxiv_davinci.jsonl  
  inflating: m4-unified/arxiv/arxiv_flant5.jsonl  
  inflating: m4-unified/arxiv/arxiv_human.jsonl  
   creating: m4-unified/reddit/
  inflating: m4-unified/reddit/reddit_bloomz.jsonl  
  inflating: m4-unified/reddit/reddit_chatGPT.jsonl  
  inflating: m4-unified/reddit/reddit_cohere.jsonl  
  inflating: m4-unified/reddit/reddit_davinci.jsonl  
  inflating: m4-unified/reddit/reddit_dolly.jsonl  
  inflating: m4-unified/reddit/reddit_flant5.jsonl  
  inflating: m4-unified/reddit/reddit_human.jsonl  
   creating: m4-unified/wikihow/
  inflating: m4-unified/wikihow/wikihow_bloomz.jsonl  
  inflating: m4-unified/wikihow/wikihow_chatGPT.jsonl  
  inflating: m4-unified/wikihow/wikihow_cohere.jsonl  


In [9]:
TEST_SET_FRACTION = 0.3
BATCH_SIZE = 32

In [10]:
df = pd.DataFrame(columns=['text', 'is_llm', 'domain', 'dataset_name'])

In [11]:
dir_path, dir_names, file_names = next(walk(M4_DATA_FOLDER_PATH))

for dir in dir_names:
    dataset_folder_path, _, dataset_names = next(walk(os.path.join(dir_path, dir)))

    for dataset_name in dataset_names:
        temp_df = pd.read_json(path_or_buf=f'{dataset_folder_path}/{dataset_name}', lines=True)
        temp_df['domain'] = dir
        temp_df['dataset_name'] = Path(dataset_name).stem

        is_llm_column = 0 if 'human' in dataset_name else 1
        temp_df['is_llm'] = is_llm_column

        df = pd.concat([df, temp_df], ignore_index=True)

In [12]:
display(df)

Unnamed: 0,text,is_llm,domain,dataset_name
0,The present work is devoted to the study of qu...,1,arxiv,arxiv_bloomz
1,We present the results of our analysis of the ...,1,arxiv,arxiv_bloomz
2,We report on spectroscopic observations made w...,1,arxiv,arxiv_bloomz
3,We present new numerical methods for stochasti...,1,arxiv,arxiv_bloomz
4,The Solar Chromosphere is an important compone...,1,arxiv,arxiv_bloomz
...,...,...,...,...
74023,This is how you clean it when it's stuck. You...,1,wikihow,wikihow_bloomz
74024,The following instructions describe how to bui...,1,wikihow,wikihow_bloomz
74025,The following steps show how to find out exact...,1,wikihow,wikihow_bloomz
74026,Coffee enemas have been used since ancient tim...,1,wikihow,wikihow_bloomz


In [13]:
df = df.astype({'is_llm': 'int64'})

In [14]:
display(df)

Unnamed: 0,text,is_llm,domain,dataset_name
0,The present work is devoted to the study of qu...,1,arxiv,arxiv_bloomz
1,We present the results of our analysis of the ...,1,arxiv,arxiv_bloomz
2,We report on spectroscopic observations made w...,1,arxiv,arxiv_bloomz
3,We present new numerical methods for stochasti...,1,arxiv,arxiv_bloomz
4,The Solar Chromosphere is an important compone...,1,arxiv,arxiv_bloomz
...,...,...,...,...
74023,This is how you clean it when it's stuck. You...,1,wikihow,wikihow_bloomz
74024,The following instructions describe how to bui...,1,wikihow,wikihow_bloomz
74025,The following steps show how to find out exact...,1,wikihow,wikihow_bloomz
74026,Coffee enemas have been used since ancient tim...,1,wikihow,wikihow_bloomz


In [15]:
tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-cased')

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/49.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/213k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/436k [00:00<?, ?B/s]



config.json:   0%|          | 0.00/465 [00:00<?, ?B/s]

In [16]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cuda


In [17]:
def tokenize_texts(df, tokenizer):
    input_ids = []
    attention_masks = []
    for text in df.text.values:
        # `encode_plus` will:
        #   (1) Tokenize the sentence.
        #   (2) Prepend the `[CLS]` token to the start.
        #   (3) Append the `[SEP]` token to the end.
        #   (4) Map tokens to their IDs.
        #   (5) Pad or truncate the sentence to `max_length`
        #   (6) Create attention masks for [PAD] tokens.
        encoded_dict = tokenizer.encode_plus(
                            text,                      # Sentence to encode.
                            add_special_tokens = True, # Add '[CLS]' and '[SEP]'
                            max_length = 512,           # Pad & truncate all sentences.
                            padding='max_length',
                            truncation=True,
                            return_attention_mask = True,   # Construct attn. masks.
                            return_tensors = 'pt',     # Return pytorch tensors.
                       )

        # Add the encoded sentence to the list.
        input_ids.append(encoded_dict['input_ids'])

        # And its attention mask (simply differentiates padding from non-padding).
        attention_masks.append(encoded_dict['attention_mask'])

    input_ids = torch.cat(input_ids, dim=0)
    attention_masks = torch.cat(attention_masks, dim=0)
    labels = torch.from_numpy(df['is_llm'].values)

    return input_ids, attention_masks, labels

In [18]:
def calculate_accuracy(y_true, y_hat):
    correct_pred = torch.eq(y_hat, y_true).sum().item()
    return (correct_pred / len(y_hat)) * 100

def calculate_f1(y_true, y_hat):
    return f1_score(y_true, y_hat)

def calculate_recall(y_true, y_hat):
    return recall_score(y_true, y_hat)

def calculate_precision(y_true, y_hat):
    return precision_score(y_true, y_hat)

def calculate_brier(y_true, y_hat):
    y_prob = torch.sigmoid(y_hat)
    return brier_score_loss(y_true, y_prob)

def calculate_auc(y_true, y_hat):
    y_prob = torch.sigmoid(y_hat)

    false_positive_rates, true_positive_rates, _ = roc_curve(y_true, y_prob)
    roc_auc = auc(false_positive_rates, true_positive_rates)

    return roc_auc, false_positive_rates, true_positive_rates

In [28]:
def test_step(model, dataloader, device):

    model.eval()

    all_y_true = []
    all_y_hat = []

    test_loss = 0
    steps = 0

    with torch.inference_mode():
        for batch in dataloader:
            batch_input_ids = batch[0].to(device)
            batch_input_mask = batch[1].to(device)
            batch_labels = batch[2].to(device)

            y_hat = model(batch_input_ids,
                          attention_mask=batch_input_mask,
                          labels=batch_labels)

            loss = y_hat.loss
            test_loss += loss.item()

            logits = y_hat.logits

            batch_labels = batch_labels.to('cpu').numpy()

            all_y_true.extend(batch_labels)
            all_y_hat.extend(logits.detach().cpu().numpy())

            steps += 1

        all_y_true = torch.FloatTensor(all_y_true)
        all_y_hat = torch.argmax(torch.FloatTensor(all_y_hat), dim=1)

        test_accuracy = calculate_accuracy(all_y_true, all_y_hat)
        test_f1 = calculate_f1(all_y_true, all_y_hat)
        test_recall = calculate_recall(all_y_true, all_y_hat)
        test_precision = calculate_precision(all_y_true, all_y_hat)
        # cannot calculate brier and auc because of the output of the distilbert model

    return test_loss / steps, test_accuracy, test_f1, test_recall, test_precision, 0, 0

def test_against_all(model, df, device, tokenizer):
    all_results = []

    for dataset_name in df['dataset_name'].unique():
        if 'human' in dataset_name:
            continue

        llm_df = df.loc[(df['dataset_name'] == dataset_name) & (df['is_llm'] == 1)]

        domain = llm_df['domain'].iloc[0]
        human_df = df.loc[(df['domain'] == domain) & (df['is_llm'] == 0)]

        test_df = pd.concat([llm_df, human_df], ignore_index=True)

        results_formatted = test(model, device, dataset_name, test_df, tokenizer)

        all_results.append({
            dataset_name: results_formatted
        })

    return all_results

def test(model, device, dataset_name, test_df, tokenizer):
    input_ids, attention_masks, labels = tokenize_texts(test_df, tokenizer)
    dataset = TensorDataset(input_ids, attention_masks, labels)

    # For validation the order doesn't matter, so we'll just read them sequentially.
    validation_dataloader = DataLoader(
            dataset, # The validation samples.
            sampler = SequentialSampler(dataset), # Pull out batches sequentially.
            batch_size = BATCH_SIZE # Evaluate with this batch size.
        )

    start_time = timer()

    test_loss, test_acc, test_f1, test_recall, test_precision, test_brier, test_auc_tuple = test_step(
        model,
        validation_dataloader,
        device
    )

    end_time = timer()

    results_formatted = {
            "test_loss": test_loss,
            "test_acc": test_acc,
            "test_f1": test_f1,
            "test_recall": test_recall,
            "test_precision": test_precision,
            "test_brier": test_brier,
            "test_auc_tuple": test_auc_tuple
        }

    print(
            f"against: {dataset_name} | "
            f"test_loss: {test_loss:.4f} | "
            f"test_acc: {test_acc:.4f} | "
            f"test_f1: {test_f1:.4f} | "
            f"test_recall: {test_recall:.4f} | "
            f"test_precision: {test_precision:.4f} | "
            f"test_brier: {test_brier:.4f} | "
            f"time: {(end_time-start_time):.4f}"
        )

    return results_formatted

In [29]:
human_df = df.loc[df['is_llm'] == 0]
human_train_df, human_test_df = train_test_split(human_df, test_size=TEST_SET_FRACTION, random_state=69)

In [30]:
# models_to_test = [
#     'fine_tune_bert_bigscience-bloomz-7b1',
#     'fine_tune_bert_gemini-pro',
#     'fine_tune_bert_gpt-3.5-turbo-0125',
#     'fine_tune_bert_gpt-4-turbo-preview',
#     'fine_tune_bert_mistralai-mixtral-8x7b-instruct-v0.1'
# ]

models_to_test = [
    'fine_tune_bert_bigscience-bloomz-7b1',
    'fine_tune_bert_gemini-pro'
]

In [31]:
final_results = []

for model_name in models_to_test:
    model_path = f'{FINE_TUNED_MODELS_FOLDER_PATH}/{model_name}'

    model = DistilBertForSequenceClassification.from_pretrained(
        model_path,
        num_labels = 2,
        output_attentions = False, # Whether the model returns attentions weights.
        output_hidden_states = False, # Whether the model returns all hidden-states.
    )

    model.to(device)

    print(f'Testing against M4 data for fined tuned distilbert on {model_name}...')

    results = test_against_all(
        model=model,
        df=df,
        device=device,
        tokenizer=tokenizer
    )

    final_results.append({
        'base_model': model_name,
        'results_against_all_llms': results
    })

    print(f'Finished testing against M4 data for fined tuned distilbert on {model_name}')


Testing against M4 data for fined tuned distilbert on fine_tune_bert_bigscience-bloomz-7b1...
against: arxiv_bloomz | test_loss: 2.6081 | test_acc: 51.5833 | test_f1: 0.4850 | test_recall: 0.4560 | test_precision: 0.5180 | test_brier: 0.0000 | time: 103.1603
against: arxiv_cohere | test_loss: 1.4244 | test_acc: 71.5000 | test_f1: 0.7499 | test_recall: 0.8543 | test_precision: 0.6681 | test_brier: 0.0000 | time: 102.6315
against: arxiv_davinci | test_loss: 1.6556 | test_acc: 65.9667 | test_f1: 0.6860 | test_recall: 0.7437 | test_precision: 0.6367 | test_brier: 0.0000 | time: 102.4701
against: arxiv_flant5 | test_loss: 1.1231 | test_acc: 77.0500 | test_f1: 0.8079 | test_recall: 0.9653 | test_precision: 0.6947 | test_brier: 0.0000 | time: 102.1080
against: arxiv_chatGPT | test_loss: 1.1961 | test_acc: 75.4833 | test_f1: 0.7921 | test_recall: 0.9340 | test_precision: 0.6876 | test_brier: 0.0000 | time: 102.7293
against: reddit_bloomz | test_loss: 1.0516 | test_acc: 79.9000 | test_f1: 0.804

In [32]:
# Create a JSON Encoder class
class json_serialize(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, np.integer):
            return int(obj)
        if isinstance(obj, np.floating):
            return float(obj)
        if isinstance(obj, np.ndarray):
            return obj.tolist()
        return json.JSONEncoder.default(self, obj)

with open('./vs_m4.json', 'w', encoding='utf-8') as f:
    json.dump(final_results, f, ensure_ascii=False, indent=4, cls=json_serialize)

In [37]:
!cp ./vs_m4_v2.json '/content/gdrive/My Drive/'