# Load JSON File from Detector

In [5]:
import pandas as pd
import json

In [3]:
input_path = 'generated_output_gpt-4.1-2025-04-14_all_detectors.json' # gpt 4.1
# input_path = 'generated_output_claude-sonnet-4-20250514_all_detectors.json' # claude sonnet 4
# input_path = 'generated_output_gemini-2.0-flash_all_detectors.json' # gemini-2.0-flash
# input_path = 'generated_output_claude-opus-4-20250514_all_detectors.json' # claude opus 4

# input_path = 'generated_output_gpt-4.1-2025-04-14_stealthgpt_all_detectors.json' # gpt 4.1 stealth
# input_path = 'generated_output_claude-opus-4-20250514_stealthgpt_all_detectors.json' # claude opus stealth
# input_path = 'generated_output_claude-sonnet-4-20250514_stealthgpt_all_detectors.json' # claude sonnet stealth
# input_path = 'generated_output_gemini-2.0-flash_stealthgpt_all_detectors.json' # gemini 2.0 flash stealth
with open(input_path, 'r', encoding='utf-8') as f:
    dataset = json.load(f)

In [4]:
pd.set_option("display.max_colwidth", 300)
df = pd.DataFrame(dataset)
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1992 entries, 0 to 1991
Data columns (total 8 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   text           1992 non-null   object
 1   year           1992 non-null   object
 2   genre          1992 non-null   object
 3   source         1992 non-null   object
 4   topic          1992 non-null   object
 5   ai_generated   1992 non-null   object
 6   human_verdict  1992 non-null   object
 7   ai_verdicts    1992 non-null   object
dtypes: object(8)
memory usage: 124.6+ KB


# Compute Threshold

In [131]:
import json
import numpy as np
import pandas as pd  # Import pandas for better table formatting

def compute_threshold(data, detector_name, score_extractor, fpr_target=0.01):
    """
    Computes a score threshold for a given detector based on a target 
    False Positive Rate (FPR) on human-written texts.
    Returns the calculated threshold and the number of scores found.
    """
    human_scores = []
    for item in data:
        human_verdict = item.get("human_verdict", {})
        detector_result = human_verdict.get(detector_name)

        if detector_result:
            score = score_extractor(detector_result)
            if score is not None:
                human_scores.append(score)

    if not human_scores:
        return None, 0 # Return None for threshold and 0 for count

    # np.quantile is the standard and safest way to find a percentile value.
    threshold = np.quantile(human_scores, 1 - fpr_target)
    
    return threshold, len(human_scores)


# --- 1. Configuration ---

# Define the list of FPR targets you want to test
FPR_TARGETS = [0.0001, 0.005, 0.010, 0.050, 0.100]

# Define how to extract the score for each detector
SCORE_EXTRACTORS = {
    "pengram": lambda result: result.get("ai_likelihood"),
    "originality": lambda result: result.get("confidence", {}).get("AI"),
    "gptzero": lambda result: result.get("average_generated_prob"),
    "roberta-base-detector": lambda result: result if isinstance(result, (float, int)) else None
}

# --- 2. Main Execution Logic ---

try:
    # Load the data once
    input_path = ''
    with open(input_path, 'r', encoding='utf-8') as f:
        dataset = json.load(f)
    
    print(f"Computing thresholds for all configured detectors from file: {input_path}\n")
    
    # Use a nested dictionary to store results: {detector: {fpr: threshold}}
    all_thresholds = {}
    
    # Outer loop for detectors
    for name, extractor in SCORE_EXTRACTORS.items():
        print(f"--- Processing Detector: '{name}' ---")
        all_thresholds[name] = {}
        
        # Inner loop for each FPR target
        for fpr in FPR_TARGETS:
            threshold, score_count = compute_threshold(dataset, name, extractor, fpr_target=fpr)
            
            if threshold is not None:
                all_thresholds[name][fpr] = threshold
                print(f"✅ FPR {fpr*100:<5.2f}% | Threshold: {threshold:.8f} (from {score_count} scores)")
            else:
                print(f"⚠️ No valid scores found for '{name}'. Cannot compute threshold.")
                # Break inner loop if no scores are found at all for this detector
                break 
        print("-" * 50)

    # --- 3. Final Summary Table ---
    print("\n--- Summary of All Thresholds ---")
    
    # Convert the nested dictionary to a pandas DataFrame for nice printing
    # This creates a much more readable table
    summary_df = pd.DataFrame(all_thresholds).T # .T transposes the DataFrame
    summary_df.index.name = "Detector"
    summary_df.columns = [f"FPR_{col*100:.3f}%" for col in summary_df.columns]
    
    if not summary_df.empty:
        print(summary_df.to_string(float_format="%.8f"))

except FileNotFoundError:
    print(f"❌ ERROR: The file was not found at the path: {input_path}")
except Exception as e:
    print(f"An unexpected error occurred: {e}")

Computing thresholds for all configured detectors from file: generated_output_gemini-2.0-flash_stealthgpt_all_detectors.json

--- Processing Detector: 'pengram' ---
✅ FPR 0.01 % | Threshold: 0.37896663 (from 1992 scores)
✅ FPR 0.50 % | Threshold: 0.00201017 (from 1992 scores)
✅ FPR 1.00 % | Threshold: 0.00057473 (from 1992 scores)
✅ FPR 5.00 % | Threshold: 0.00007047 (from 1992 scores)
✅ FPR 10.00% | Threshold: 0.00002279 (from 1992 scores)
--------------------------------------------------
--- Processing Detector: 'originality' ---
✅ FPR 0.01 % | Threshold: 0.99242762 (from 1322 scores)
✅ FPR 0.50 % | Threshold: 0.05767250 (from 1322 scores)
✅ FPR 1.00 % | Threshold: 0.02273200 (from 1322 scores)
✅ FPR 5.00 % | Threshold: 0.00240000 (from 1322 scores)
✅ FPR 10.00% | Threshold: 0.00090000 (from 1322 scores)
--------------------------------------------------
--- Processing Detector: 'gptzero' ---
✅ FPR 0.01 % | Threshold: 1.00000000 (from 1475 scores)
✅ FPR 0.50 % | Threshold: 1.0000000

## Compute Label based on Threshold

In [76]:
import json
from tqdm import tqdm

# --- 1. Configuration ---

# **IMPORTANT**: Populate this dictionary with the results from your previous script.
THRESHOLDS = {
    "pengram": {
        0.0001: 0.37923204,
        0.005:  0.00201159,
        0.010:  0.00057646,
        0.050:  0.00007047,
        0.100:  0.00002258
    },
    "originality": {
        0.0001: 0.99253326,
        0.005:  0.05871750,
        0.010:  0.02301900,
        0.050:  0.00250000,
        0.100:  0.00100000
    },
    "gptzero": {
        0.0001: 1.00000000 ,
        0.005:  1.00000000 ,
        0.010:  0.00000000,
        0.050:  0.00000000,
        0.100:  0.00000000
    },
    "roberta-base-detector": {
        0.0001: 0.99983308 ,
        0.005:  0.99983192 ,
        0.010:  0.99983157,
        0.050:  0.99982989,
        0.100:  0.99982750
    }
}

# THRESHOLDS = {
#     "pengram": {
#         0.99: 0.99,
#         0.97: 0.97,
#         0.95: 0.95,
#         0.90: 0.90,
#         0.80: 0.80
#     },
#     "originality": {
#         0.99: 0.99,
#         0.97: 0.97,
#         0.95: 0.95,
#         0.90: 0.90,
#         0.80: 0.80
#     },
#     "gptzero": {
#         0.99: 0.99,
#         0.97: 0.97,
#         0.95: 0.95,
#         0.90: 0.90,
#         0.80: 0.80
#     },
#     "roberta-base-detector": {
#         0.99: 0.99,
#         0.97: 0.97,
#         0.95: 0.95,
#         0.90: 0.90,
#         0.80: 0.80
#     }
# }


# This tells the script how to find the score for each detector. (Unchanged)
SCORE_EXTRACTORS = {
    "pengram": lambda result: result.get("ai_likelihood"),
    "originality": lambda result: result.get("confidence", {}).get("AI"),
    "gptzero": lambda result: result.get("average_generated_prob"),
    "roberta-base-detector": lambda result: result if isinstance(result, (float, int)) else None
}

# --- 2. File Paths ---
input_file = ''  # ← Your input file
output_file = '' # 

# --- 3. Main Labeling Logic ---

def get_label(detector_result, score_extractor, threshold):
    """Safely extracts a score, compares it to a threshold, and returns a label."""
    if not detector_result:
        return None
    score = score_extractor(detector_result)
    if score is None:
        return None
    return 1 if score > threshold else 0


# Load the JSON data
try:
    with open(input_file, "r", encoding="utf-8") as f:
        data = json.load(f)
except FileNotFoundError:
    print(f"❌ ERROR: The file was not found at the path: {input_file}")
    exit()

# Loop through each entry in the dataset to add labels
for entry in tqdm(data, desc="Applying multi-FPR labels"):
    
    # Initialize new dictionaries to hold the nested labels for this entry
    entry["human_labels_by_fpr"] = {}
    entry["ai_labels_by_fpr"] = {}

    # --- Process Human Verdict for all detectors and FPRs ---
    human_verdict = entry.get("human_verdict", {})
    for detector_name, fpr_thresholds in THRESHOLDS.items():
        entry["human_labels_by_fpr"][detector_name] = {}
        detector_result = human_verdict.get(detector_name)
        score_extractor = SCORE_EXTRACTORS[detector_name]
        
        for fpr, threshold in fpr_thresholds.items():
            label = get_label(detector_result, score_extractor, threshold)
            fpr_key = f"FPR_{fpr*100:.3f}%"
            entry["human_labels_by_fpr"][detector_name][fpr_key] = label

    # --- Process all AI Verdicts for all detectors and FPRs ---
    ai_verdicts_by_model = entry.get("ai_verdicts", {})
    for model_name, model_verdicts in ai_verdicts_by_model.items():
        entry["ai_labels_by_fpr"][model_name] = {}
        for detector_name, fpr_thresholds in THRESHOLDS.items():
            entry["ai_labels_by_fpr"][model_name][detector_name] = {}
            detector_result = model_verdicts.get(detector_name)
            score_extractor = SCORE_EXTRACTORS[detector_name]
            
            for fpr, threshold in fpr_thresholds.items():
                label = get_label(detector_result, score_extractor, threshold)
                fpr_key = f"FPR_{fpr*100:.3f}%"
                entry["ai_labels_by_fpr"][model_name][detector_name][fpr_key] = label

# Save the results to the new output file
with open(output_file, "w", encoding="utf-8") as f:
    json.dump(data, f, ensure_ascii=False, indent=2)

print(f"✅ Labeling completed. Output file: {output_file}")

Applying multi-FPR labels: 100%|██████████| 1990/1990 [00:00<00:00, 21736.16it/s]


✅ Labeling completed. Output file: generated_output_claude-sonnet-4-20250514_stealthgpt_all_detectors_with_multi_fpr_labels.json


In [6]:
import json
input_path = 'generated_output_gemini-2.0-flash_stealthgpt_all_detectors_with_multi_fpr_labels.json'
with open(input_path, 'r') as f:
    check = json.load(f)

In [10]:
import pandas as pd
pd.set_option("display.max_colwidth", 200)
test = pd.DataFrame(check)
test.head(2)
# error_rows = test[test['ai_verdicts'].apply(lambda x: 'error' in str(x).lower())]

# error_rows.info()

Unnamed: 0,text,year,genre,source,topic,ai_generated,stealthgpt_rephrased,human_verdict,ai_verdicts,human_labels_by_fpr,ai_labels_by_fpr
0,"The proposed training, which would have been provided by volunteers at no cost to the state, would occur during orientation for legislators at the beginning of each session.The bill was not prompt...",2014,news,https://huggingface.co/datasets/cc_news,"North Dakota Senate rejects cultural competency training bill for legislators, opting for a study instead, following concerns about overreach and strained relationships post-Dakota Access Pipeline...",{'gemini-2.0-flash': 'The North Dakota Senate chamber buzzed with restrained energy as the vote tally flashed across the screen. A bill mandating cultural competency training for state legislators...,{'gemini-2.0-flash': 'The Senate chamber in North Dakota buzzed with contained energy when the vote tally appeared onscreen. A bill requiring cultural competency training for state legislators had...,"{'pengram': {'ai_likelihood': 5.960464477539062e-07, 'prediction': 'Unlikely AI'}, 'originality': {'classification': {'AI': 0, 'Original': 1}, 'confidence': {'AI': 0.0028, 'Original': 0.9971}}, 'g...","{'gemini-2.0-flash': {'pengram': {'ai_likelihood': 1.0, 'prediction': 'Highly Likely AI'}, 'originality': {'classification': {'AI': 0, 'Original': 1}, 'confidence': {'AI': 0.0714, 'Original': 0.92...","{'pengram': {'FPR_0.010%': 0, 'FPR_0.500%': 0, 'FPR_1.000%': 0, 'FPR_5.000%': 0, 'FPR_10.000%': 0}, 'originality': {'FPR_0.010%': 0, 'FPR_0.500%': 0, 'FPR_1.000%': 0, 'FPR_5.000%': 1, 'FPR_10.000%...","{'gemini-2.0-flash': {'pengram': {'FPR_0.010%': 1, 'FPR_0.500%': 1, 'FPR_1.000%': 1, 'FPR_5.000%': 1, 'FPR_10.000%': 1}, 'originality': {'FPR_0.010%': 0, 'FPR_0.500%': 1, 'FPR_1.000%': 1, 'FPR_5.0..."
1,"TPR broke the story that Hulu and its 500 jobs would come to San Antonio pending incentives that city, county and state officials have promised.While the state of Texas has already offered nearly ...",2017,news,https://huggingface.co/datasets/cc_news,"Hulu's potential expansion to San Antonio. City and county incentives, including tax rebates, pending votes. 500 jobs expected with wage and investment requirements.","{'gemini-2.0-flash': 'San Antonio could soon be home to a major new player in the streaming world. Hulu is considering expanding its operations to the Alamo City, a move that could bring a signifi...","{'gemini-2.0-flash': 'A new, major player in the streaming world could soon be calling San Antonio home. Hulu is looking at expanding its operations to the River City, which could provide a shot i...","{'pengram': {'ai_likelihood': 0.0, 'prediction': 'Unlikely AI'}, 'originality': {'classification': {'AI': 0, 'Original': 1}, 'confidence': {'AI': 0.0003, 'Original': 0.9995}}, 'gptzero': {'average...","{'gemini-2.0-flash': {'pengram': {'ai_likelihood': 1.0, 'prediction': 'Highly Likely AI'}, 'originality': {'classification': {'AI': 0, 'Original': 1}, 'confidence': {'AI': 0.0101, 'Original': 0.98...","{'pengram': {'FPR_0.010%': 0, 'FPR_0.500%': 0, 'FPR_1.000%': 0, 'FPR_5.000%': 0, 'FPR_10.000%': 0}, 'originality': {'FPR_0.010%': 0, 'FPR_0.500%': 0, 'FPR_1.000%': 0, 'FPR_5.000%': 0, 'FPR_10.000%...","{'gemini-2.0-flash': {'pengram': {'FPR_0.010%': 1, 'FPR_0.500%': 1, 'FPR_1.000%': 1, 'FPR_5.000%': 1, 'FPR_10.000%': 1}, 'originality': {'FPR_0.010%': 0, 'FPR_0.500%': 0, 'FPR_1.000%': 0, 'FPR_5.0..."


# Compute Precision, Recall, FPR, and FNR by Models and Detectors

In [122]:
import json
import pandas as pd
import numpy as np
import scipy.stats as st

# --- 1. Configuration ---

# Set to True to filter by word count, or False to process the entire file.
APPLY_WORD_COUNT_FILTER = False 

# Define the filtering criteria (only used if the filter is active)
# You can choose to keep texts WITH MORE than (>=) or LESS than (<) the threshold.
WORD_COUNT_THRESHOLD = 50
FILTER_MODE = "<" # Options: ">=" to keep long texts, "<" to keep short texts.

# Define the exact FPR targets you used in the previous labeling script.
FPR_TARGETS = [0.0001, 0.005, 0.010, 0.050, 0.100]
# FPR_TARGETS = [0.99, 0.97, 0.95, 0.90, 0.80]

# --- 2. Load and Optionally Filter Data ---
input_path = ""

try:
    with open(input_path, "r", encoding="utf-8") as f:
        data = json.load(f)
except FileNotFoundError:
    print(f"❌ ERROR: The file was not found at the path: {input_path}")
    exit()

# By default, we process the whole dataset
data_to_process = data.copy()

# Conditionally apply the filter if the switch is on
if APPLY_WORD_COUNT_FILTER:
    print(f"--- Applying Word Count Filter (Mode: {FILTER_MODE} {WORD_COUNT_THRESHOLD} words) ---")
    original_count = len(data)
    
    if FILTER_MODE == ">=":
        data_to_process = [
            item for item in data if len(item.get("text", "").split()) >= WORD_COUNT_THRESHOLD
        ]
        filter_desc = f">= {WORD_COUNT_THRESHOLD}"
    elif FILTER_MODE == "<":
        data_to_process = [
            item for item in data if len(item.get("text", "").split()) < WORD_COUNT_THRESHOLD
        ]
        filter_desc = f"< {WORD_COUNT_THRESHOLD}"
    else:
        print(f"⚠️ Warning: Invalid FILTER_MODE '{FILTER_MODE}'. No filtering will be applied.")
        filter_desc = "None"

    filtered_count = len(data_to_process)
    print(f"Filtering complete. Kept {filtered_count} of {original_count} entries (texts with {filter_desc} words).")
else:
    print("--- No word count filter applied. Processing all entries. ---")


# --- 3. Build Records from the Nested Label Structure ---
records = []
# Check the data we intend to process
if data_to_process:
    detectors_in_file = list(data_to_process[0].get("human_labels_by_fpr", {}).keys())
else:
    detectors_in_file = []
    print("⚠️ No data left to process after filtering. Exiting.")
    exit()

# *** The main loop now iterates over data_to_process ***
for item in data_to_process:
    for detector in detectors_in_file:
        for fpr in FPR_TARGETS:
            fpr_key = f"FPR_{fpr*100:.3f}%" # Recreate the key used in the previous script

            # Process human-written text labels for each FPR
            human_labels_by_detector = item.get("human_labels_by_fpr", {}).get(detector, {})
            if fpr_key in human_labels_by_detector and human_labels_by_detector[fpr_key] is not None:
                records.append({
                    "detector": detector,
                    "fpr_target": fpr,
                    "is_human": True,
                    "verdict": human_labels_by_detector[fpr_key]
                })

            # Process AI-generated text labels for all models and each FPR
            ai_labels_by_model = item.get("ai_labels_by_fpr", {})
            for model_name, model_labels in ai_labels_by_model.items():
                ai_labels_by_detector = model_labels.get(detector, {})
                if fpr_key in ai_labels_by_detector and ai_labels_by_detector[fpr_key] is not None:
                    records.append({
                        "detector": detector,
                        "fpr_target": fpr,
                        "is_human": False,
                        "verdict": ai_labels_by_detector[fpr_key]
                    })

df = pd.DataFrame(records)

# --- 4. Wilson Score Confidence Interval Function (Unchanged) ---
def wilson_ci(k, n, alpha=0.05):
    if n == 0: return (0, 0)
    z = st.norm.ppf(1 - alpha / 2)
    phat = k / n
    denom = 1 + z**2 / n
    center = phat + z**2 / (2 * n)
    pm = z * ((phat * (1 - phat) / n + z**2 / (4 * n**2)) ** 0.5)
    return ((center - pm) / denom, (center + pm) / denom)

# --- 5. Evaluation (Grouped by Detector and FPR Target) ---
results = []
metrics_results = []

if not df.empty:
    for (detector, fpr), group in df.groupby(["detector", "fpr_target"]):
        human = group[group["is_human"]]
        ai = group[~group["is_human"]]

        total_human = len(human)
        total_ai = len(ai)

        fp = (human["verdict"] == 1).sum()
        fn = (ai["verdict"] == 0).sum()
        tp = (ai["verdict"] == 1).sum()

        type1_rate = fp / total_human if total_human else np.nan
        type2_rate = fn / total_ai if total_ai else np.nan
        precision = tp / (tp + fp) if (tp + fp) > 0 else np.nan
        recall = tp / (tp + fn) if (tp + fn) > 0 else np.nan

        results.append({
            "Detector": detector,
            "FPR Target": f"{fpr:.4f}",
            "Error Type": "Type I (False Positive)",
            "Error Rate": type1_rate, 
            "Count": fp, 
            "Total": total_human
        })
        results.append({
            "Detector": detector,
            "FPR Target": f"{fpr:.4f}",
            "Error Type": "Type II (False Negative)",
            "Error Rate": type2_rate, 
            "Count": fn, 
            "Total": total_ai
        })
        metrics_results.append({
            "Detector": detector,
            "FPR Target": f"{fpr:.4f}",
            "Precision": precision,
            "Recall": recall})

# --- 6. Create and Display Final DataFrames ---
error_rates_df = pd.DataFrame(results).sort_values(by=["Detector", "FPR Target"])
metrics_df = pd.DataFrame(metrics_results).sort_values(by=["Detector", "FPR Target"])

print("\n--- Error Rate Analysis (Type I & II Errors) ---")
if error_rates_df.empty:
    print("No data to display.")
else:
    print(error_rates_df.to_string(index=False))

print("\n\n--- Performance Metrics Analysis (Precision & Recall) ---")
if metrics_df.empty:
    print("No data to display.")
else:
    print(metrics_df.to_string(index=False))

--- No word count filter applied. Processing all entries. ---

--- Error Rate Analysis (Type I & II Errors) ---
             Detector FPR Target               Error Type  Error Rate  Count  Total
              gptzero     0.0001  Type I (False Positive)    0.000000      0   1475
              gptzero     0.0001 Type II (False Negative)    1.000000   1475   1475
              gptzero     0.0050  Type I (False Positive)    0.000000      0   1475
              gptzero     0.0050 Type II (False Negative)    1.000000   1475   1475
              gptzero     0.0100  Type I (False Positive)    0.006102      9   1475
              gptzero     0.0100 Type II (False Negative)    0.444068    655   1475
              gptzero     0.0500  Type I (False Positive)    0.006102      9   1475
              gptzero     0.0500 Type II (False Negative)    0.444068    655   1475
              gptzero     0.1000  Type I (False Positive)    0.006102      9   1475
              gptzero     0.1000 Type II (False 

In [109]:
error_rates_df['model'] = "GPT-4.1"
error_rates_df_gpt_4_1 = error_rates_df.copy()
# error_rates_df_gpt_4_1

In [114]:
error_rates_df['model'] = "Claude Opus 4"
error_rates_df_claude_opus_4 = error_rates_df.copy()
# error_rates_df_claude_opus_4

In [119]:
error_rates_df['model'] = "Claude Sonnet 4"
error_rates_df_claude_sonnet_4 = error_rates_df.copy()
# error_rates_df_claude_sonnet_4

In [124]:
error_rates_df['model'] = "Gemini 2.0 Flash"
error_rates_df_gemini_2_flash = error_rates_df.copy()
# error_rates_df_gemini_2_flash

In [127]:
error_rates_stealth_df_final = pd.concat([error_rates_df_gpt_4_1, 
                              error_rates_df_claude_opus_4,
                              error_rates_df_claude_sonnet_4,
                              error_rates_df_gemini_2_flash], 
                             axis = 0)

error_rates_stealth_df_final.info()
error_rates_stealth_df_final.to_csv("error_rates_stealth_df_final.csv")

<class 'pandas.core.frame.DataFrame'>
Index: 160 entries, 0 to 39
Data columns (total 7 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   Detector    160 non-null    object 
 1   FPR Target  160 non-null    object 
 2   Error Type  160 non-null    object 
 3   Error Rate  160 non-null    float64
 4   Count       160 non-null    int64  
 5   Total       160 non-null    int64  
 6   model       160 non-null    object 
dtypes: float64(1), int64(2), object(4)
memory usage: 10.0+ KB


In [111]:
metrics_df['model'] = "GPT-4.1"
metrics_df_gpt_4_1 = metrics_df.copy()
# metrics_df_gpt_4_1

In [116]:
metrics_df['model'] = "Claude Opus 4"
metrics_df_claude_opus_4 = metrics_df.copy()
# metrics_df_claude_opus_4

In [121]:
metrics_df['model'] = "Claude Sonnet 4"
metrics_df_claude_sonnet_4 = metrics_df.copy()
# metrics_df_claude_sonnet_4

In [126]:
metrics_df['model'] = "Gemini 2.0 Flash"
metrics_df_gemini_2_flash = metrics_df.copy()
# metrics_df_gemini_2_flash

In [128]:
metrics_stealth_df_final = pd.concat([metrics_df_gpt_4_1, 
                              metrics_df_claude_opus_4,
                              metrics_df_claude_sonnet_4,
                              metrics_df_gemini_2_flash], 
                             axis = 0)

metrics_stealth_df_final.info()
metrics_stealth_df_final.to_csv("metrics_stealth_df_final.csv")

<class 'pandas.core.frame.DataFrame'>
Index: 80 entries, 0 to 19
Data columns (total 5 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   Detector    80 non-null     object 
 1   FPR Target  80 non-null     object 
 2   Precision   72 non-null     float64
 3   Recall      80 non-null     float64
 4   model       80 non-null     object 
dtypes: float64(2), object(3)
memory usage: 3.8+ KB


# Compute AUC ROC, FPR, FNR

In [91]:
# Table 1A – AUROC, Δ-Mean, FPR & FNR
import json
import pandas as pd
import numpy as np
from sklearn.metrics import roc_auc_score, roc_curve # Import roc_curve

# --- Configuration ---

# Set to True to filter by word count, or False to process the entire file.
APPLY_WORD_COUNT_FILTER = False 

# Define the filtering criteria (only used if the filter is active)
# You can choose to keep texts WITH MORE than (>=) or LESS than (<) the threshold.
WORD_COUNT_THRESHOLD = 50
FILTER_MODE = "<" # Options: ">=" to keep long texts, "<" to keep short texts.

# === Load JSON file ===
# Using the file path from your last script for context
input_path = ''
with open(input_path, "r", encoding="utf-8") as f:
    data = json.load(f)
    
# By default, we process the whole dataset
data_to_process = data.copy()

# Conditionally apply the filter if the switch is on
if APPLY_WORD_COUNT_FILTER:
    print(f"--- Applying Word Count Filter (Mode: {FILTER_MODE} {WORD_COUNT_THRESHOLD} words) ---")
    original_count = len(data)
    
    if FILTER_MODE == ">=":
        data_to_process = [
            item for item in data if len(item.get("text", "").split()) >= WORD_COUNT_THRESHOLD
        ]
        filter_desc = f">= {WORD_COUNT_THRESHOLD}"
    elif FILTER_MODE == "<":
        data_to_process = [
            item for item in data if len(item.get("text", "").split()) < WORD_COUNT_THRESHOLD
        ]
        filter_desc = f"< {WORD_COUNT_THRESHOLD}"
    else:
        print(f"⚠️ Warning: Invalid FILTER_MODE '{FILTER_MODE}'. No filtering will be applied.")
        filter_desc = "None"

    filtered_count = len(data_to_process)
    print(f"Filtering complete. Kept {filtered_count} of {original_count} entries (texts with {filter_desc} words).")
else:
    print("--- No word count filter applied. Processing all entries. ---")

# === Supported detectors and their probability fields ===
detectors = {
    "originality": lambda d: d.get("confidence", {}).get("AI", None),
    "pengram": lambda d: d.get("ai_likelihood", None),
    "gptzero": lambda d: d.get("average_generated_prob", None),
    "roberta-base-detector": lambda d: d if isinstance(d, (float, int)) else None
}

# === Collect scores ===
records = []
for entry in data_to_process:
    # Human text
    hv = entry.get("human_verdict", {})
    for det, extract in detectors.items():
        if det in hv:
            prob = extract(hv[det])
            if prob is not None:
                records.append({
                    "detector": det, "source": "human", "score": prob, "label": 0
                })

    # AI-generated text from all models
    for model, verdicts in entry.get("ai_verdicts", {}).items():
        for det, extract in detectors.items():
            if det in verdicts:
                prob = extract(verdicts[det])
                if prob is not None:
                    records.append({
                        "detector": det, "source": f"ai_{model}", "score": prob, "label": 1
                    })

# === Convert to DataFrame ===
df = pd.DataFrame(records)
if df.empty:
    print("⚠️ No records were created. Please check your input JSON structure and detectors.")
    exit()

# === Compute AUROC, Δ-Mean, FPR & FNR for each detector ===
summary = []

for det in df["detector"].unique():
    sub = df[df["detector"] == det]
    
    # Ensure there are both human and AI scores to process
    if not (0 in sub["label"].values and 1 in sub["label"].values):
        print(f"Skipping detector '{det}' as it's missing either human or AI scores.")
        continue

    ai_scores = sub[sub["label"] == 1]["score"]
    human_scores = sub[sub["label"] == 0]["score"]
    all_labels = sub["label"]
    all_scores = sub["score"]

    # --- Calculate AUROC and Δ-Mean (your original code) ---
    try:
        auroc = roc_auc_score(all_labels, all_scores)
    except ValueError:
        auroc = None

    delta_mean = ai_scores.mean() - human_scores.mean()

    # --- NEW: Calculate FPR and FNR at an optimal threshold ---
    fpr_values, tpr_values, thresholds = roc_curve(all_labels, all_scores)

    # Find the optimal threshold using Youden's J statistic
    j_statistic = tpr_values - fpr_values
    optimal_idx = np.argmax(j_statistic)
    
    # Get the values at that optimal point
    optimal_threshold = thresholds[optimal_idx]
    optimal_fpr = fpr_values[optimal_idx] # This is the False Positive Rate
    optimal_tpr = tpr_values[optimal_idx] # This is the True Positive Rate (Sensitivity/Recall)
    optimal_fnr = 1 - optimal_tpr        # False Negative Rate = 1 - TPR

    summary.append({
        "detector": det,
        "AUROC": round(auroc, 4) if auroc is not None else "N/A",
        "Δ-Mean": round(delta_mean, 4),
        "FPR": round(optimal_fpr, 4), # Add FPR to the summary
        "FNR": round(optimal_fnr, 4), # Add FNR to the summary
        "Optimal Threshold": round(optimal_threshold, 4) # Also useful to see the threshold
    })

# === Final Table 1A Output ===
summary_df = pd.DataFrame(summary)
if not summary_df.empty:
    summary_df = summary_df.sort_values(by="AUROC", ascending=False)

# print(summary_df.to_string(index=False))
# summary_df.to_csv("path/to/your/output.csv", index=False)

# Display the final DataFrame
summary_df

--- No word count filter applied. Processing all entries. ---


Unnamed: 0,detector,AUROC,Δ-Mean,FPR,FNR,Optimal Threshold
1,pengram,0.9981,0.9328,0.009,0.0231,0.0008
0,originality,0.9674,0.1862,0.0998,0.0497,0.001
2,gptzero,0.7748,0.5311,0.0061,0.4441,0.1176
3,roberta-base-detector,0.5289,0.0074,0.38,0.5472,0.9998


In [88]:
summary_df['model'] = "GPT-4.1"
summary_df_4_1 = summary_df.copy()
summary_df_4_1

Unnamed: 0,detector,AUROC,Δ-Mean,FPR,FNR,Optimal Threshold,model
1,pengram,0.9984,0.942,0.009,0.0181,0.0007,GPT-4.1
0,originality,0.9631,0.1709,0.1078,0.043,0.0009,GPT-4.1
2,gptzero,0.7322,0.4483,0.0061,0.5291,0.2407,GPT-4.1
3,roberta-base-detector,0.5035,-0.0095,0.3825,0.5823,0.9998,GPT-4.1


In [90]:
summary_df['model'] = "Claude Opus 4"
summary_df_claude_opus_4 = summary_df.copy()
summary_df_claude_opus_4

Unnamed: 0,detector,AUROC,Δ-Mean,FPR,FNR,Optimal Threshold,model
1,pengram,0.9984,0.9253,0.009,0.0221,0.0009,Claude Opus 4
0,originality,0.9621,0.1237,0.1009,0.0409,0.001,Claude Opus 4
2,gptzero,0.6637,0.3091,0.0062,0.6662,0.1628,Claude Opus 4
3,roberta-base-detector,0.5364,0.0077,0.4091,0.5035,0.9998,Claude Opus 4


In [85]:
summary_df['model'] = "Claude Sonnet 4"
summary_df_claude_sonnet_4 = summary_df.copy()
summary_df_claude_sonnet_4

Unnamed: 0,detector,AUROC,Δ-Mean,FPR,FNR,Optimal Threshold,model
1,pengram,0.9993,0.9364,0.009,0.0136,0.0007,Claude Sonnet 4
0,originality,0.9573,0.1241,0.1114,0.051,0.0009,Claude Sonnet 4
2,gptzero,0.7017,0.384,0.0063,0.59,0.0526,Claude Sonnet 4
3,roberta-base-detector,0.5179,0.002,0.407,0.5422,0.9998,Claude Sonnet 4


In [92]:
summary_df['model'] = "Gemini 2.0 Flash"
summary_df_gemini_2_flash = summary_df.copy()
summary_df_gemini_2_flash

Unnamed: 0,detector,AUROC,Δ-Mean,FPR,FNR,Optimal Threshold,model
1,pengram,0.9981,0.9328,0.009,0.0231,0.0008,Gemini 2.0 Flash
0,originality,0.9674,0.1862,0.0998,0.0497,0.001,Gemini 2.0 Flash
2,gptzero,0.7748,0.5311,0.0061,0.4441,0.1176,Gemini 2.0 Flash
3,roberta-base-detector,0.5289,0.0074,0.38,0.5472,0.9998,Gemini 2.0 Flash


In [93]:
summary_stealth_df_final = pd.concat([summary_df_4_1, 
                              summary_df_claude_opus_4,
                              summary_df_claude_sonnet_4,
                              summary_df_gemini_2_flash], 
                             axis = 0)

summary_stealth_df_final.info()
summary_stealth_df_final.to_csv("summary_stealth_df_final.csv")

<class 'pandas.core.frame.DataFrame'>
Index: 16 entries, 1 to 3
Data columns (total 7 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   detector           16 non-null     object 
 1   AUROC              16 non-null     float64
 2   Δ-Mean             16 non-null     float64
 3   FPR                16 non-null     float64
 4   FNR                16 non-null     float64
 5   Optimal Threshold  16 non-null     float64
 6   model              16 non-null     object 
dtypes: float64(5), object(2)
memory usage: 1.0+ KB


# Compute AUC ROC, FPR, FNR by Genre

In [103]:
# Table 1A – AUROC, Δ-Mean, FPR & FNR by Genre
import json
import pandas as pd
import numpy as np
from sklearn.metrics import roc_auc_score, roc_curve

# --- Configuration ---

# Set to True to filter by word count, or False to process the entire file.
APPLY_WORD_COUNT_FILTER = False 

# Define the filtering criteria (only used if the filter is active)
# You can choose to keep texts WITH MORE than (>=) or LESS than (<) the threshold.
WORD_COUNT_THRESHOLD = 50
FILTER_MODE = "<" # Options: ">=" to keep long texts, "<" to keep short texts.

# === Load JSON file ===
input_path = '' 
with open(input_path, "r", encoding="utf-8") as f:
    data = json.load(f)

# By default, we process the whole dataset
data_to_process = data.copy()

# Conditionally apply the filter if the switch is on
if APPLY_WORD_COUNT_FILTER:
    print(f"--- Applying Word Count Filter (Mode: {FILTER_MODE} {WORD_COUNT_THRESHOLD} words) ---")
    original_count = len(data)
    
    if FILTER_MODE == ">=":
        data_to_process = [
            item for item in data if len(item.get("text", "").split()) >= WORD_COUNT_THRESHOLD
        ]
        filter_desc = f">= {WORD_COUNT_THRESHOLD}"
    elif FILTER_MODE == "<":
        data_to_process = [
            item for item in data if len(item.get("text", "").split()) < WORD_COUNT_THRESHOLD
        ]
        filter_desc = f"< {WORD_COUNT_THRESHOLD}"
    else:
        print(f"⚠️ Warning: Invalid FILTER_MODE '{FILTER_MODE}'. No filtering will be applied.")
        filter_desc = "None"

    filtered_count = len(data_to_process)
    print(f"Filtering complete. Kept {filtered_count} of {original_count} entries (texts with {filter_desc} words).")
else:
    print("--- No word count filter applied. Processing all entries. ---")
    
# === Supported detectors and their probability fields ===
detectors = {
    "originality": lambda d: d.get("confidence", {}).get("AI", None),
    "pengram": lambda d: d.get("ai_likelihood", None),
    "gptzero": lambda d: d.get("average_generated_prob", None),
    "roberta-base-detector": lambda d: d if isinstance(d, (float, int)) else None
}

# === Collect scores, now including genre ===
records = []
for entry in data_to_process:
    # --- NEW: Extract genre for each entry ---
    genre = entry.get("genre", "unknown") # Default to 'unknown' if genre key is missing

    # Human text
    hv = entry.get("human_verdict", {})
    for det, extract in detectors.items():
        if det in hv:
            prob = extract(hv[det])
            if prob is not None:
                records.append({
                    "genre": genre, # Add genre
                    "detector": det, 
                    "source": "human", 
                    "score": prob, 
                    "label": 0
                })

    # AI-generated text from all models
    for model, verdicts in entry.get("ai_verdicts", {}).items():
        for det, extract in detectors.items():
            if det in verdicts:
                prob = extract(verdicts[det])
                if prob is not None:
                    records.append({
                        "genre": genre, # Add genre
                        "detector": det, 
                        "source": f"ai_{model}", 
                        "score": prob, 
                        "label": 1
                    })

# === Convert to DataFrame ===
df = pd.DataFrame(records)
if df.empty:
    print("⚠️ No records were created. Please check your input JSON structure and detectors.")
    exit()

# === Compute metrics for each Genre-Detector combination ===
summary = []

# --- NEW: Group by both genre and detector ---
for (genre, detector), group in df.groupby(["genre", "detector"]):
    
    # Ensure there are both human and AI scores to process in the group
    if not (0 in group["label"].values and 1 in group["label"].values):
        print(f"Skipping group ('{genre}', '{detector}') as it's missing either human or AI scores.")
        continue

    ai_scores = group[group["label"] == 1]["score"]
    human_scores = group[group["label"] == 0]["score"]
    all_labels = group["label"]
    all_scores = group["score"]

    # --- Calculate AUROC and Δ-Mean ---
    try:
        auroc = roc_auc_score(all_labels, all_scores)
    except ValueError:
        auroc = None

    delta_mean = ai_scores.mean() - human_scores.mean()

    # --- Calculate FPR and FNR at an optimal threshold ---
    fpr_values, tpr_values, thresholds = roc_curve(all_labels, all_scores)
    j_statistic = tpr_values - fpr_values
    optimal_idx = np.argmax(j_statistic)
    
    optimal_threshold = thresholds[optimal_idx]
    optimal_fpr = fpr_values[optimal_idx]
    optimal_tpr = tpr_values[optimal_idx]
    optimal_fnr = 1 - optimal_tpr

    summary.append({
        "Genre": genre, # Add genre to the final summary
        "Detector": detector,
        "AUROC": round(auroc, 4) if auroc is not None else "N/A",
        "Δ-Mean": round(delta_mean, 4),
        "FPR": round(optimal_fpr, 4),
        "FNR": round(optimal_fnr, 4),
        "Optimal Threshold": round(optimal_threshold, 4)
    })

# === Final Table 1A Output ===
summary_df = pd.DataFrame(summary)
if not summary_df.empty:
    # Sort by Genre first, then by AUROC within each genre
    summary_df = summary_df.sort_values(by=["Genre", "AUROC"], ascending=[True, False])

# Display the final DataFrame
summary_df

--- No word count filter applied. Processing all entries. ---


Unnamed: 0,Genre,Detector,AUROC,Δ-Mean,FPR,FNR,Optimal Threshold
2,amazon review,pengram,0.995,0.6988,0.01,0.05,0.0001
1,amazon review,originality,0.8883,0.2319,0.2932,0.0625,0.0009
0,amazon review,gptzero,0.6697,0.3283,0.02,0.64,0.2
3,amazon review,roberta-base-detector,0.5973,0.0798,0.365,0.43,0.994
6,blog,pengram,0.9988,0.8299,0.0,0.02,0.0001
5,blog,originality,0.9934,0.2436,0.0493,0.0299,0.0007
4,blog,gptzero,0.7349,0.4542,0.005,0.525,0.2941
7,blog,roberta-base-detector,0.5547,0.051,0.075,0.81,0.9998
10,news,pengram,0.9982,0.9767,0.0033,0.0133,0.1987
9,news,originality,0.9539,0.2842,0.1781,0.0441,0.0012


In [96]:
summary_df['model'] = "GPT-4.1"
summary_df_4_1 = summary_df.copy()
# summary_df_4_1

In [99]:
summary_df['model'] = "Claude Opus 4"
summary_df_claude_opus_4 = summary_df.copy()
# summary_df_claude_opus_4

In [102]:
summary_df['model'] = "Claude Sonnet 4"
summary_df_claude_sonnet_4 = summary_df.copy()
# summary_df_claude_sonnet_4

In [105]:
summary_df['model'] = "Gemini 2.0 Flash"
summary_df_gemini_2_flash = summary_df.copy()
# summary_df_gemini_2_flash

In [106]:
summary_stealth_genre__df_final = pd.concat([summary_df_4_1, 
                              summary_df_claude_opus_4,
                              summary_df_claude_sonnet_4,
                              summary_df_gemini_2_flash], 
                             axis = 0)

summary_stealth_genre__df_final.info()
summary_stealth_genre__df_final.to_csv("summary_stealth_genre_df_final.csv")

<class 'pandas.core.frame.DataFrame'>
Index: 96 entries, 2 to 23
Data columns (total 8 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Genre              96 non-null     object 
 1   Detector           96 non-null     object 
 2   AUROC              96 non-null     float64
 3   Δ-Mean             96 non-null     float64
 4   FPR                96 non-null     float64
 5   FNR                96 non-null     float64
 6   Optimal Threshold  96 non-null     float64
 7   model              96 non-null     object 
dtypes: float64(5), object(3)
memory usage: 6.8+ KB
