#### By: Peyman Shahidi
#### Created: Jan 30, 2026
#### Last Edit: Jan 30, 2026

<br>

In [1]:
#Python
import getpass
import numpy as np
import pandas as pd
from collections import defaultdict
import itertools
import random 

## formatting number to appear comma separated and with two digits after decimal: e.g, 1000 shown as 1,000.00
pd.set_option('float_format', "{:,.2f}".format)

import matplotlib.pyplot as plt
#%matplotlib inline
#from matplotlib.legend import Legend

import warnings
warnings.filterwarnings('ignore')
pd.set_option('display.max_rows', 200)

In [2]:
main_folder_path = ".."
input_data_path = f"{main_folder_path}/data"
output_data_path = f'{input_data_path}/computed_objects/aiChain_length_count_robustness'
output_plot_path = f"{main_folder_path}/writeup/plots/aiChain_length_count_robustness"

In [3]:
# Create directories if they don't exist
import os

for path in [output_data_path, output_plot_path]:
    if not os.path.exists(path):
        os.makedirs(path)

In [4]:
# Determine AI chains
def create_ai_chains_df_def1(df, group_cols):
    # Create is_ai column
    ai_chains_df = df.copy()
    ai_chains_df = ai_chains_df.sort_values(by=group_cols + ['Task Position']).reset_index(drop=True)
    ai_chains_df['is_ai'] = ai_chains_df['label'].isin(['Augmentation', 'Automation']).astype(int)

    # Create next_is_ai column within occupation groups
    ai_chains_df['next_is_ai'] = ai_chains_df.groupby(group_cols)['is_ai'].shift(-1).fillna(0).astype(int)

    # Determine if task is part of an AI chain
    ai_chains_df['ai_chain'] = 0
    ai_chain_logic = (ai_chains_df['is_ai'] == 1) & (ai_chains_df['next_is_ai'] == 1)
    ai_chains_df.loc[ai_chain_logic, 'ai_chain'] = 1

    # Flag for switching from AI chain to non-AI task
    ai_chains_df['ai_chain_end'] = 0
    ai_chains_df.loc[(ai_chains_df['is_ai'] == 1) & (ai_chains_df['next_is_ai'] == 0), 'ai_chain_end'] = 1


    # Calculate AI chain ids and lengths
    # Approach: within each occupation, detect starts of contiguous runs of is_ai (current is_ai==1 and previous is_ai!=1),
    # assign an incrementing chain id for those runs, then compute the length of each chain and the number of chains per occupation.
    # previous task's is_ai (within occupation)
    ai_chains_df['prev_is_ai'] = ai_chains_df.groupby(group_cols)['is_ai'].shift(1).fillna(0).astype(int)
    # mark start of a new chain when current is AI and previous is not
    ai_chains_df['start_chain'] = ((ai_chains_df['is_ai'] == 1) & (ai_chains_df['prev_is_ai'] == 0)).astype(int)
    # cumulative sum of starts per occupation gives a chain id (0 if never started)
    ai_chains_df['chain_id'] = ai_chains_df.groupby(group_cols)['start_chain'].cumsum()
    # Non-AI tasks shouldn't have a chain id; set to NA for clarity
    ai_chains_df.loc[ai_chains_df['is_ai'] == 0, 'chain_id'] = pd.NA


    # Compute chain lengths (only for AI tasks/chain ids)
    chain_lengths = (
        ai_chains_df[ai_chains_df['is_ai'] == 1]
        .groupby(group_cols + ['chain_id'])
        .size()
        .reset_index(name='chain_length')
    )
    # Attach chain lengths back to the main df
    ai_chains_df = ai_chains_df.merge(chain_lengths, on=group_cols + ['chain_id'], how='left')

    # Average Number of AI Chains per occupation
    num_chains = (
        chain_lengths.groupby(group_cols)['chain_id']
        .nunique()
        .reset_index(name='num_ai_chains')
    )
    ai_chains_df = ai_chains_df.merge(num_chains, on=group_cols, how='left')
    ai_chains_df['num_ai_chains'] = ai_chains_df['num_ai_chains'].fillna(0).astype(int)

    # For convenience: fill chain_length = 0 for non-AI rows
    ai_chains_df['chain_length'] = ai_chains_df['chain_length'].fillna(0).astype(int)

    # Remove irrelevant columns
    ai_chains_df = ai_chains_df.drop(columns=['prev_is_ai','start_chain'])

    # Calculate mean length of AI chains and Average Number of AI Chains across entire dataset
    mean_chain_length = chain_lengths['chain_length'].mean()
    num_ai_chains = ai_chains_df['num_ai_chains'].mean()

    return ai_chains_df, chain_lengths, mean_chain_length, num_ai_chains

In [5]:
master_results = []   # list of DataFrames, concat once at end

# Initialize the input file with the original data
input_file_path_list = [f"{input_data_path}/computed_objects/ONET_Eloundou_Anthropic_GPT/ONET_Eloundou_Anthropic_GPT.csv"]

for x in range(1, 11): # Ignore 0 as it's the repetition of the original prompts
    input_file_path_list.append(f"{input_data_path}/computed_objects/ONET_Eloundou_Anthropic_GPT/ONET_Eloundou_Anthropic_GPT_{x}.csv")


for x, input_file_path in enumerate(input_file_path_list):
    merged_data = pd.read_csv(input_file_path)
    merged_data = merged_data[['O*NET-SOC Code', 'Occupation Title', 'Task ID', 'Task Title',
        'Task Position', 'Task Type', 
        'Major_Group_Code', 'Major_Group_Title', 
        'Minor_Group_Code', 'Minor_Group_Title',
        'Broad_Occupation_Code', 'Broad_Occupation_Title',
        'Detailed_Occupation_Code', 'Detailed_Occupation_Title',
        'gpt4_exposure', 'human_labels', 
        'automation', 'augmentation', 'label']]


    # AI Chain Definition 1: treat Augmentation and Automation as AI tasks
    group_cols = ['O*NET-SOC Code', 'Occupation Title']

    # Run
    ai_chains_df, chain_lengths, mean_chain_length, num_ai_chains = create_ai_chains_df_def1(merged_data, group_cols)

    # Create dataframe summarizing results
    result_df = pd.DataFrame({
        'prompt': [x],
        'mean_chain_length': [mean_chain_length],
        'avg_num_ai_chains': [num_ai_chains]
    })

    master_results.append(result_df)

# Concatenate all results into a master dataframe and save
master_df = pd.concat(master_results, ignore_index=True)
master_df.to_csv(f"{output_data_path}/aiChain_length_count.csv", index=False)

In [6]:
# Read randomized task position results for plotting
df = pd.read_csv(f"{input_data_path}/computed_objects/aiChain_length_count/aiChains_taskPositionReshuffle_definition1.csv")
df = df[1:]  # Exclude first row if it's a repetition of original run
randomized_position_mean_chain_length = df['mean_chain_length'].mean()
randomized_position_avg_num_ai_chains = df['num_ai_chains'].mean()

In [7]:
df = master_df.copy()

# Clean + sort
df["prompt"] = pd.to_numeric(df["prompt"], errors="coerce")
df["mean_chain_length"] = pd.to_numeric(df["mean_chain_length"], errors="coerce")
df["avg_num_ai_chains"] = pd.to_numeric(df["avg_num_ai_chains"], errors="coerce")
df = df.dropna(subset=["prompt", "mean_chain_length", "avg_num_ai_chains"]).sort_values("prompt")

x = df["prompt"].values
y_len = df["mean_chain_length"].values
y_cnt = df["avg_num_ai_chains"].values

mask0 = (df["prompt"] == 0)
has0 = mask0.any()

# ----------------------------
# Plot 1: Average AI Chain Length
# ----------------------------
fig, ax = plt.subplots(figsize=(8, 6), constrained_layout=True)

if has0:
    ax.scatter(df.loc[mask0, "prompt"], df.loc[mask0, "mean_chain_length"],
               color="red", zorder=3, s=50, label="Main Prompt")

ax.scatter(x, y_len, marker='o', color="tab:orange", lw=1.8, zorder=2, label="Robustness Prompts")
    
# ✅ mean line (across prompts)
mean_len = float(np.nanmean(y_len))
ax.axhline(mean_len, color="tab:orange", linestyle="--", lw=2.0, alpha=0.9,
           label=f"Mean (across prompts) = {mean_len:.2f}")

# ax.set_title("Average AI Chain Length Robustness Check", fontsize=16)
ax.set_xlabel("GPT Prompt", fontsize=14)
ax.set_ylabel("Average AI Chain Length", fontsize=14)
ax.tick_params(axis='both', labelsize=14)

ax.set_ylim(1.28, 1.52)
ax.axhline(y=randomized_position_mean_chain_length, color='black', linestyle='--', lw=1.8, alpha=0.9,
           label='Mean of Randomized Task Position Placebos')

ax.legend(loc='best', fontsize=14)

plt.savefig(f"{output_plot_path}/aiChain_length_robustness.png", dpi=300)
plt.close()


# ----------------------------
# Plot 2: Average Number of AI Chains
# ----------------------------
fig, ax = plt.subplots(figsize=(8, 6), constrained_layout=True)

if has0:
    ax.scatter(df.loc[mask0, "prompt"], df.loc[mask0, "avg_num_ai_chains"],
               color="red", zorder=3, s=50, label="Main Prompt")

ax.scatter(x, y_cnt, marker='o', color="tab:blue", lw=1.8, zorder=2, label="Robustness Prompts")
    
# ✅ mean line (across prompts)
mean_cnt = float(np.nanmean(y_cnt))
ax.axhline(mean_cnt, color="tab:blue", linestyle="--", lw=2.0, alpha=0.9,
           label=f"Mean (across prompts) = {mean_cnt:.2f}")

# ax.set_title("Average Number of AI Chains Robustness Check", fontsize=16)
ax.set_xlabel("GPT Prompt", fontsize=14)
ax.set_ylabel("Average Count of AI Chains", fontsize=14)
ax.tick_params(axis='both', labelsize=14)

ax.set_ylim(1.98, 2.32)
ax.axhline(y=randomized_position_avg_num_ai_chains, color='black', linestyle='--', lw=1.8, alpha=0.9,
           label='Mean of Randomized Task Position Placebos')

ax.legend(loc='best', fontsize=14)

plt.savefig(f"{output_plot_path}/aiChain_count_robustness.png", dpi=300)
plt.close()