<a href="https://colab.research.google.com/github/ephipie/human-ai-parallel-detection/blob/main/LLM_Detection_Results_04_Analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
import glob
from statsmodels.stats.contingency_tables import mcnemar
import os

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
output_folder = '/content/drive/MyDrive/shared_data/llm_detection_data_preds_batched'

In [None]:
def read_as_df(folder_path):
    all_files = glob.glob(f"{folder_path}/*.parquet")
    df_list = []
    for file in all_files:
        print(f"Reading file: {file}")
        df = pd.read_parquet(file)
        df_list.append(df)
    combined_df = pd.concat(df_list, ignore_index=True)
    return combined_df

In [None]:
# Display the combined DataFrame
df = read_as_df(output_folder)

Reading file: /content/drive/MyDrive/shared_data/llm_detection_data_preds_batched/batch1.parquet
Reading file: /content/drive/MyDrive/shared_data/llm_detection_data_preds_batched/batch2.parquet
Reading file: /content/drive/MyDrive/shared_data/llm_detection_data_preds_batched/batch3.parquet
Reading file: /content/drive/MyDrive/shared_data/llm_detection_data_preds_batched/batch4.parquet
Reading file: /content/drive/MyDrive/shared_data/llm_detection_data_preds_batched/batch5.parquet
Reading file: /content/drive/MyDrive/shared_data/llm_detection_data_preds_batched/batch6.parquet
Reading file: /content/drive/MyDrive/shared_data/llm_detection_data_preds_batched/batch7.parquet
Reading file: /content/drive/MyDrive/shared_data/llm_detection_data_preds_batched/batch8.parquet
Reading file: /content/drive/MyDrive/shared_data/llm_detection_data_preds_batched/batch9.parquet
Reading file: /content/drive/MyDrive/shared_data/llm_detection_data_preds_batched/batch10.parquet
Reading file: /content/drive/

In [None]:
df.head(1)

model,serial_num,domain,chunk_1,chunk_2,gpt,llama,chunk_1_embeddings,chunk_2_embeddings,gpt_embeddings,llama_embeddings,chunk_2_sim,gpt_sim,llama_sim,chunk_2_vs_gpt,chunk_2_vs_llama,llm_chunk2_vs_gpt,llm_chunk2_vs_gpt_prob,llm_chunk2_vs_llama,llm_chunk2_vs_llama_prob
0,1,blog,"A few years ago, in 1998 actually, somehow I h...",How to seek out and discover on your own. How ...,What you consume and what you experience. In a...,...and how to question what you're told. That'...,"[0.17692478, -0.19238019, 0.55274934, -0.65050...","[0.20167919, 0.070158415, 0.7308793, -0.549549...","[-0.063170284, 0.36187038, 0.6474991, 0.152032...","[0.21806918, -0.0717968, 0.8958833, -0.5207005...",0.823468,0.589775,0.921548,1,0,chunk_2,0.974043,chunk_2,0.99929


In [None]:
df['llm_correct_gpt'] = (df['llm_chunk2_vs_gpt'] == 'chunk_2').astype(int)
df['llm_correct_llama'] = (df['llm_chunk2_vs_llama'] == 'chunk_2').astype(int)
df['emb_correct_gpt'] = df['chunk_2_vs_gpt']
df['emb_correct_llama'] = df['chunk_2_vs_llama']

def basic_summary(df):
  domain_size = df.groupby('domain').size().rename('size')
  domain_means = df[['domain','emb_correct_gpt','llm_correct_gpt','emb_correct_llama','llm_correct_llama']].groupby('domain').mean().T

  # Concatenate the size and mean dataframes
  # Use .to_frame().T for domain_size to match the transposed structure of domain_means
  result = pd.concat([domain_means, domain_size.to_frame().T])

  display(result)

basic_summary(df)

domain,acad,blog,fic,news,spok,tvm
emb_correct_gpt,0.55,0.85,0.7,0.84,1.0,0.95
llm_correct_gpt,0.73,0.69,0.96,0.67,0.33,0.68
emb_correct_llama,0.55,0.85,0.67,0.88,1.0,0.88
llm_correct_llama,0.87,0.95,1.0,0.85,0.5,0.91
size,100.0,100.0,100.0,100.0,100.0,100.0


In [None]:
def create_mcnemar_summary_df(df, model_name):
    """
    Generates a DataFrame summarizing McNemar's test results for a given model.
    """

    results = []

    for domain in list(df['domain'].unique()) + ['Overall']:

        if domain == 'Overall':
            segmented_df = df
        else:
          segmented_df = df[df['domain'] == domain]

        table = pd.crosstab(segmented_df['emb_correct_' + model_name], segmented_df['llm_correct_' + model_name])
        result = mcnemar(table, exact=True)

        base_accuracy = segmented_df['emb_correct_' + model_name].mean()
        llm_accuracy = segmented_df['llm_correct_' + model_name].mean()

        winner = ""
        if result.pvalue < 0.05:
            if base_accuracy > llm_accuracy:
                winner = "Base"
            else:
                winner = "LLM"

        results.append({
            'Segment': domain,
            'Base Accuracy': base_accuracy,
            'LLM Accuracy': llm_accuracy,
            'P-value': result.pvalue,
            'Significant': result.pvalue < 0.05,
            'Winner': winner
        })

    return pd.DataFrame(results)

# Example usage:
gpt_summary_df = create_mcnemar_summary_df(df, 'gpt')
llama_summary_df = create_mcnemar_summary_df(df, 'llama')

print("GPT Summary:")
display(gpt_summary_df)

print("\nLlama Summary:")
display(llama_summary_df)


GPT Summary:


Unnamed: 0,Segment,Base Accuracy,LLM Accuracy,P-value,Significant,Winner
0,blog,0.85,0.69,0.016589,True,Base
1,tvm,0.95,0.68,4.628673e-07,True,Base
2,fic,0.7,0.96,2.556015e-06,True,LLM
3,acad,0.55,0.73,0.007915897,True,LLM
4,news,0.84,0.67,0.007632079,True,Base
5,spok,1.0,0.33,1.355253e-20,True,Base
6,Overall,0.815,0.676667,1.574393e-07,True,Base



Llama Summary:


Unnamed: 0,Segment,Base Accuracy,LLM Accuracy,P-value,Significant,Winner
0,blog,0.85,0.95,0.04138947,True,LLM
1,tvm,0.88,0.91,0.6476059,False,
2,fic,0.67,1.0,2.328306e-10,True,LLM
3,acad,0.55,0.87,9.430375e-07,True,LLM
4,news,0.88,0.85,0.6776395,False,
5,spok,1.0,0.5,1.776357e-15,True,Base
6,Overall,0.805,0.846667,0.08058114,False,


In [None]:
df.head()

model,serial_num,domain,chunk_1,chunk_2,gpt,llama,chunk_1_embeddings,chunk_2_embeddings,gpt_embeddings,llama_embeddings,...,chunk_2_vs_gpt,chunk_2_vs_llama,llm_chunk2_vs_gpt,llm_chunk2_vs_gpt_prob,llm_chunk2_vs_llama,llm_chunk2_vs_llama_prob,llm_correct_gpt,llm_correct_llama,emb_correct_gpt,emb_correct_llama
0,1,blog,"A few years ago, in 1998 actually, somehow I h...",How to seek out and discover on your own. How ...,What you consume and what you experience. In a...,...and how to question what you're told. That'...,"[0.17692478, -0.19238019, 0.55274934, -0.65050...","[0.20167919, 0.070158415, 0.7308793, -0.549549...","[-0.063170284, 0.36187038, 0.6474991, 0.152032...","[0.21806918, -0.0717968, 0.8958833, -0.5207005...",...,1,0,chunk_2,0.974043,chunk_2,0.99929,1,1,1,0
1,2,tvm,She's hit and staggers back. She falls off the...,You've done wonderful work for me. Anita nods ...,"She pauses, her silhouette framed against the ...","As she approaches, Anita's eyes widen, her gaz...","[0.019059956, 0.009883495, 0.40174046, -0.2103...","[-0.1272203, 0.094850525, 0.7139143, -0.320899...","[-0.081343934, 0.23176657, 0.7393192, 0.037610...","[-0.09176732, 0.07848711, 0.8584767, -0.350226...",...,1,1,chunk_2,0.893309,chunk_2,0.924142,1,1,1,1
2,5,fic,"In this country charity covers no sins!"" The s...",He looked like a youthful athlete from Oxford ...,"He rode with a straight-backed earnestness, hi...","As he rode beside Steinmetz, his gaze wandered...","[0.17215355, 0.11953501, 0.45598105, -0.430415...","[0.18087405, 0.14303514, 0.4629773, -0.6372824...","[0.104638964, 0.17923556, 0.6044753, -0.260741...","[-0.04206298, 0.3965542, 0.6186486, 0.08837312...",...,1,1,chunk_2,0.996406,chunk_2,0.99539,1,1,1,1
3,12,acad,Childhood fear and anxiety is highly prevalent...,"In recent years, Approach-Avoidance Tasks (AAT...","On this foundation, there emerges a crucial un...",Further research has also explored the role of...,"[0.14454798, 0.3539043, 0.37668487, -0.1954876...","[0.2980197, 0.34318274, 0.4024725, -0.02849399...","[0.090629525, 0.57718563, 0.3996874, 0.0491416...","[0.25648093, 0.38404942, 0.44225797, 0.0806673...",...,1,0,gpt,0.5,chunk_2,0.904651,0,1,1,0
4,12,blog,"Ok, are ya ready for the play-by-play scoop on...","Amy got around midnight, I guess it was. Crazy...","Monday, May 17, 2004 I woke up to the bright V...","After dinner, we decided to take a stroll alon...","[0.3391175, -0.31348073, 0.39489847, -0.571691...","[0.14056093, -0.17877051, 0.61079717, -0.52234...","[0.17332825, 0.18910483, 0.6438352, -0.4698107...","[0.18876973, -0.0639153, 0.79906297, -0.506764...",...,1,1,chunk_2,0.985936,chunk_2,0.999374,1,1,1,1


In [None]:
final_output_folder = '/content/drive/MyDrive/shared_data/final_output'
os.makedirs(final_output_folder, exist_ok=True)
df.to_parquet(f"{final_output_folder}/llm_detection.parquet")

In [None]:
df.columns

Index(['serial_num', 'domain', 'chunk_1', 'chunk_2', 'gpt', 'llama',
       'chunk_1_embeddings', 'chunk_2_embeddings', 'gpt_embeddings',
       'llama_embeddings', 'chunk_2_sim', 'gpt_sim', 'llama_sim',
       'chunk_2_vs_gpt', 'chunk_2_vs_llama', 'llm_chunk2_vs_gpt',
       'llm_chunk2_vs_gpt_prob', 'llm_chunk2_vs_llama',
       'llm_chunk2_vs_llama_prob', 'llm_correct_gpt', 'llm_correct_llama',
       'emb_correct_gpt', 'emb_correct_llama'],
      dtype='object', name='model')