# One Example for detecting LLM-generated Texts

In [None]:
'''
Both H0 & H1 scenarios are considered.
**H0**: one sequence is scores of texts sampled from XSum Dataset, the other one is scores for real Olympic News.
**H1**: one sequence is scores of texts sampled from XSum Dataset, the other one is scores for fake news generated by Gemini-1.5-Pro.
'''

In [None]:
%%bash
# With OAlg method being Online Newton Step (ONS)
echo "$(date), Setting up environment ..."
mkdir -p results 
python scripts/bet_ons.py --file1 "Detect_LLM/data/xsum.gemini_1.5_pro.gpt-j-6B.gpt-neo-2.7B.sampling_discrepancy.json" --type1 "real" \
                             --file2 "Detect_LLM/data/olympic.gemini_1.5_pro.gpt-j-6B.gpt-neo-2.7B.sampling_discrepancy.json" --type2 "samples" \
                             --file3 "Detect_LLM/data/olympic.gemini_1.5_pro.gpt-j-6B.gpt-neo-2.7B.sampling_discrepancy.json" --type3 "real" \
                             --iters 300 --shift_time None --output_file "Detect_LLM/results/pro.neo2.7.fast.json"


In [None]:
%%bash
# With OAlg method being FTRL+Barrier
echo "$(date), Setting up environment ..."
mkdir -p results 
python scripts/bet_ftrl_barrier.py --file1 "Detect_LLM/data/xsum.gemini_1.5_pro.gpt-j-6B.gpt-neo-2.7B.sampling_discrepancy.json" --type1 "real" \
                             --file2 "Detect_LLM/data/olympic.gemini_1.5_pro.gpt-j-6B.gpt-neo-2.7B.sampling_discrepancy.json" --type2 "samples" \
                             --file3 "Detect_LLM/data/olympic.gemini_1.5_pro.gpt-j-6B.gpt-neo-2.7B.sampling_discrepancy.json" --type3 "real" \
                             --iters 300 --shift_time None --output_file "Detect_LLM/results/pro.neo2.7.fast.json"


In [None]:
%%bash
# With OAlg method being Optimistic-FTRL+Barrier
echo "$(date), Setting up environment ..."
mkdir -p results 
python scripts/bet_optimistic_ftrl_barrier.py --file1 "Detect_LLM/data/xsum.gemini_1.5_pro.gpt-j-6B.gpt-neo-2.7B.sampling_discrepancy.json" --type1 "real" \
                             --file2 "Detect_LLM/data/olympic.gemini_1.5_pro.gpt-j-6B.gpt-neo-2.7B.sampling_discrepancy.json" --type2 "samples" \
                             --file3 "Detect_LLM/data/olympic.gemini_1.5_pro.gpt-j-6B.gpt-neo-2.7B.sampling_discrepancy.json" --type3 "real" \
                             --iters 300 --shift_time None --output_file "Detect_LLM/results/pro.neo2.7.fast.json"


# Get Results to Plot

In [None]:
import json
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [None]:

json_files = ['Detect_LLM/results/pro.neo2.7.fast.json']
item_names = ['ONS','FTRL+Barrier', 'Optimistic-FTRL+Barrier']
data_collect = {name: {'rejection_time': [], 'power': [], 'fpr': []} for name in item_names}


for file in json_files:
    with open(file, 'r') as f:
        data = json.load(f)
        for i, item in enumerate(data):  
            data_collect[item_names[i]]['rejection_time'].append(item['rejection_time'])
            data_collect[item_names[i]]['power'].append(item['power'])
            data_collect[item_names[i]]['fpr'].append(item['fpr'])


results = []
for name, metrics in data_collect.items():
    avg_rejection_time = np.mean(metrics['rejection_time'], axis=0).tolist()
    avg_power = np.mean(metrics['power'], axis=0).tolist()
    avg_fpr = np.mean(metrics['fpr'], axis=0).tolist()
    results.append({
        'item_name': name,
        'rejection_time': avg_rejection_time,
        'power': avg_power,
        'fpr': avg_fpr
    })

results_json = json.dumps(results, indent=4)

with open('Detect_LLM/results_to_plot/pro.neo2.7.fast.json', 'w') as f:
    f.write(results_json)


# Plot Results

In [None]:

with open('Detect_LLM/results_to_plot/flash.neo2.7.eta1_300.json', 'r') as file:
    items = json.load(file)

df_list = []
for item in items:
    df = pd.DataFrame({
        'rejection_time': item['rejection_time'],
        'fpr': item['fpr'],
        'name': item['item_name'],  
        'alpha': np.linspace(0.005, 0.1, len(item['fpr']))  
    })
    df_list.append(df)

fig, ax = plt.subplots(1, 2, figsize=(13, 4.2)) 
i=0
markers = ["*", "s", "^"]
for df in df_list:
    ax[0].plot(df['rejection_time'], df['fpr'],ls='--', lw=3,  marker=markers[i], label=df['name'].iloc[0], markersize=9)
    ax[1].plot(df['alpha'], df['fpr'], ls='--', lw=3, marker=markers[i], label=df['name'].iloc[0], markersize=9)
    i += 1

ax[0].tick_params(axis='both', labelsize=20, which='major', length=10,  width=2)
ax[0].set_ylim(-0.005,0.105)
ax[0].set_yticks(np.arange(0,0.105,0.02))
ax[0].set_xlim(-10,160)
ax[0].set_xticks(np.arange(0,160, 30))
ax[0].set_xlabel(r'Rejection Time ($\tau$)', fontsize=20)
ax[0].set_ylabel('False Positive Rate (FPR)', fontsize=20)

x = np.linspace(0, 0.1, 500)
y = x 
ax[1].fill_between(x, 0, y, color='yellow', alpha=0.1, zorder=1)  

ax[1].set_ylim(-0.005,0.105)
ax[1].set_yticks(np.arange(0,0.105,0.02))

ax[1].set_xlim(-0.005,0.105)
ax[1].set_xticks(np.arange(0,0.12,0.02))
ax[1].tick_params(axis='both', labelsize=20, which='major', length=10,  width=2)
ax[1].plot([0, 0.1], [0, 0.1], color='k', ls='--',  lw=3)
ax[1].set_xlabel(r'Significance Level ($\alpha$)', fontsize=20)
ax[1].set_ylabel('False Positive Rate (FPR)', fontsize=20)


handles, labels = next(ax.flat).get_legend_handles_labels()
fig.legend(handles, labels, loc='upper center', bbox_to_anchor=(0.49, -0.07), fancybox=True, shadow=True,
             ncol=5, fontsize=20, labelspacing=0.1, handletextpad=0.5, handlelength=1)
plt.subplots_adjust(wspace=0.4)  
for axis in ax:  
    for spine in axis.spines.values():
        spine.set_linewidth(2)  

plt.savefig('Detect_LLM/plot_png/flash.neo2.7.eta1_300.png', dpi=300, bbox_inches='tight')  
plt.show()

# Plot Distributions

In [None]:

import matplotlib.pyplot as plt
import json
import numpy as np

file_paths1 = [
    'Detect_LLM/data/xsum.gemini_1.5_flash.gpt-j-6B.gpt-neo-2.7B.sampling_discrepancy.json',
    'Detect_LLM/data/xsum.gemini_1.5_pro.gpt-j-6B.gpt-neo-2.7B.sampling_discrepancy.json',
    'Detect_LLM/data/xsum.palm2.gpt-j-6B.gpt-neo-2.7B.sampling_discrepancy.json'
]
file_paths2 = [
    'Detect_LLM/data/olympic.gemini_1.5_flash.gpt-j-6B.gpt-neo-2.7B.sampling_discrepancy.json',
    'Detect_LLM/data/olympic.gemini_1.5_pro.gpt-j-6B.gpt-neo-2.7B.sampling_discrepancy.json',
    'Detect_LLM/data/olympic.palm2.gpt-j-6B.gpt-neo-2.7B.sampling_discrepancy.json'
]

def load_data(file_paths, key):
    data_accumulate = []
    for file_path in file_paths:
        with open(file_path, 'r') as file:
            data = json.load(file)
            data_accumulate.extend(data['predictions'][key])
    return np.array(data_accumulate)


y1_list = [load_data([file], 'real') for file in file_paths1]
y2_list = [load_data([file], 'samples') for file in file_paths2]
y1_avg = np.concatenate(y1_list)
y2_avg = np.concatenate(y2_list)

label_lst=['Gemini-1.5-Flash','Gemini-1.5-Pro', 'PaLM 2']
fig, axes = plt.subplots(2, 2, figsize=(8, 6))
axes = axes.flatten()
x_label = "Value"
y_label = "Frequency"

# Plot individual comparisons
for i in range(3):
    sns.histplot(y1_list[i], bins=30, color='dodgerblue', alpha=0.5, label='Human', kde=False, edgecolor='none', ax=axes[i])
    sns.histplot(y2_list[i], bins=30, color='orange', alpha=0.5, label=label_lst[i], kde=False, edgecolor='none', ax=axes[i])
    axes[i].legend(loc="upper left",fontsize=11,framealpha=0.5)
    axes[i].set_xlim(-3, 7)
    axes[i].set_xticks(np.arange(-3, 8, 2))
    axes[i].set_ylim(0, 80)
    axes[i].set_yticks(np.arange(0, 81, 20))
    axes[i].tick_params(axis='both', which='major', length=10, width=1.2, labelsize=10)

    axes[i].set_xlabel(x_label, fontsize=15)  
    axes[i].set_ylabel(y_label, fontsize=15)  

# Plot the average comparison in the fourth subplot
sns.histplot(y1_avg, bins=30, color='dodgerblue', alpha=0.5, label='Human', kde=False, edgecolor='none', ax=axes[3])
sns.histplot(y2_avg, bins=30, color='orange', alpha=0.5, label='LLMs', kde=False, edgecolor='none', ax=axes[3])
axes[3].legend(loc="upper left", fontsize=11,framealpha=0.5)
axes[3].set_xlim(-3, 7)
axes[3].set_xticks(np.arange(-3, 8, 2))
axes[3].set_ylim(0, 200)
axes[3].tick_params(axis='both', which='major', length=10, width=1.2, labelsize=10)
axes[3].set_xlabel(x_label, fontsize=15)  
axes[3].set_ylabel(y_label, fontsize=15)  

for i, ax in enumerate(axes):
    ax.tick_params(axis='both', which='major', length=10, width=1.5, labelsize=15)
    for spine in ax.spines.values():
        spine.set_linewidth(1.5)

plt.tight_layout()
plt.savefig('Detect_LLM/plot_png/all_comparison.png', dpi=300, bbox_inches='tight')
plt.show()
