#### By: Peyman Shahidi
#### Created: Oct 19, 2025
#### Last Edit: Nov 12, 2025

<br>

In [37]:
#Python
import getpass
import numpy as np
import pandas as pd
from collections import defaultdict
import itertools
import random 

## formatting number to appear comma separated and with two digits after decimal: e.g, 1000 shown as 1,000.00
pd.set_option('float_format', "{:,.2f}".format)

import matplotlib.pyplot as plt
#%matplotlib inline
#from matplotlib.legend import Legend

import warnings
warnings.filterwarnings('ignore')
pd.set_option('display.max_rows', 200)

In [38]:
main_folder_path = ".."
input_data_path = f"{main_folder_path}/data"
output_data_path = f'{input_data_path}/computed_objects/execTypeVaryingDWA_anthropicIndex'
output_plot_path = f"{main_folder_path}/writeup/plots/execTypeVaryingDWA"

In [39]:
# Create directories if they don't exist
import os

for path in [output_data_path, output_plot_path]:
    if not os.path.exists(path):
        os.makedirs(path)

## Set variables

In [40]:
# Number of reshuffles
n_shuffles = 1000


dependent_var = 'is_ai'
plot_title_variable = 'Task is AI'
# dependent_var = 'is_automated'
# plot_title_variable = 'Task is Automated'


TARGET_REGS = ['prev2_is_ai', 'prev_is_ai', 'next_is_ai', 'next2_is_ai']
SPECS = ['no_fe', 'fe_MajorGroup', 'fe_MinorGroup']

PLOT_TITLE = ['Task Before Previous Task', 'Previous Task', 'Next Task', 'Task After Next Task']

### Main Code

In [41]:
# Get list of DWAs with tasks in multiple occupations
dwa_list_path = f"{input_data_path}/computed_objects/similar_dwa_tasks/similarTasks"

# Read all CSV files
import glob
dwa_csv_files = glob.glob(os.path.join(dwa_list_path, "*.csv"))
print(f"Found {len(dwa_csv_files)} DWA CSV files.")

# Load them into DataFrames, skipping 1-row files
dwa_dfs = []
skipped_files_count = 0
for f in dwa_csv_files:
    df = pd.read_csv(f)
    if len(df) > 1: # Skip if DWA contains only one task
        dwa_dfs.append(df)
    else:
        skipped_files_count += 1
print(f"Skipped {skipped_files_count} DWA files with only one task.")
    

# Combine into one DataFrame
df_all = pd.concat(dwa_dfs, ignore_index=True)
repetitive_dwa_task_ids = df_all['Task ID'].unique().tolist()
repetitive_dwa_task_titles = df_all['Task Title'].unique().tolist()
print(f"Found {len(repetitive_dwa_task_ids)} tasks related to these DWAs.")

Found 2047 DWA CSV files.
Skipped 47 DWA files with only one task.
Found 13535 tasks related to these DWAs.


In [42]:
# Create a DWA-level dataset with number of tasks and occupations per DWA, as well as fraction of manual, automation, and augmentation tasks per DWA
merged_data = pd.read_csv(f"{input_data_path}/computed_objects/ONET_Eloundou_Anthropic_GPT/ONET_Eloundou_Anthropic_GPT.csv")
merged_data['is_manual'] = merged_data['label'] == 'Manual'
merged_data['is_automation'] = merged_data['label'] == 'Automation'
merged_data['is_augmentation'] = merged_data['label'] == 'Augmentation'


# Merge back DWA ID and DWA Titles to the merged_data
dwa_task_mapping = pd.read_csv(f"{input_data_path}/computed_objects/similar_dwa_tasks/dwa_task_mapping.csv")
print(f'Length of merged_data before merging DWA info: {merged_data.shape[0]}')
merged_data = merged_data.merge(dwa_task_mapping, on=['Task ID', 'Task Title', 'O*NET-SOC Code', 'Occupation Title'], how='left')
print(f'Length of merged_data after merging DWA info: {merged_data.shape[0]}')


# Aggregate to get fractions
dwa_grouped = merged_data.groupby(['DWA ID', 'DWA Title']).agg(
    num_tasks = ('Task ID', 'nunique'),
    num_occupations = ('O*NET-SOC Code', 'nunique'),
    fraction_manual = ('is_manual', 'mean'),
    fraction_automation = ('is_automation', 'mean'),
    fraction_augmentation = ('is_augmentation', 'mean'),
).reset_index()
print(f"Created DWA-level dataset with {dwa_grouped.shape[0]} DWAs.")

# Keep only DWAs with variation in terms of execution type across occupations
dwa_grouped_filtered = dwa_grouped[
     (dwa_grouped['num_occupations'] > 1) & (dwa_grouped['fraction_manual'] > 0) & (dwa_grouped['fraction_manual'] < 1)
].copy()
display(dwa_grouped_filtered)

# Create list of DWAs with varying execution types
dwas_varying_exec_types_ids = dwa_grouped_filtered['DWA ID'].unique().tolist()
dwas_varying_exec_types_titles = dwa_grouped_filtered['DWA Title'].unique().tolist()
print(f"Identified {len(dwas_varying_exec_types_ids)} DWAs with varying execution types across occupations.")

# Save output
dwa_grouped_filtered.to_csv(f"{output_data_path}/dwas_varying_execution_types.csv", index=False)

Length of merged_data before merging DWA info: 17925
Length of merged_data after merging DWA info: 22267
Created DWA-level dataset with 2081 DWAs.


Unnamed: 0,DWA ID,DWA Title,num_tasks,num_occupations,fraction_manual,fraction_automation,fraction_augmentation
0,4.A.1.a.1.I01.D01,Review art or design materials.,6,4,0.83,0.00,0.17
2,4.A.1.a.1.I01.D03,Review production information to determine cos...,6,2,0.83,0.00,0.17
3,4.A.1.a.1.I01.D04,Study scripts to determine project requirements.,9,8,0.33,0.56,0.11
4,4.A.1.a.1.I02.D01,Read materials to determine needed actions.,4,4,0.75,0.25,0.00
5,4.A.1.a.1.I02.D02,Read maps to determine routes.,4,4,0.75,0.00,0.25
...,...,...,...,...,...,...,...
2066,4.A.4.c.3.I05.D03,"Purchase materials, equipment, or other resour...",22,20,0.95,0.00,0.05
2069,4.A.4.c.3.I05.D06,Purchase products or services.,15,10,0.93,0.07,0.00
2072,4.A.4.c.3.I06.D01,Prescribe treatments or therapies.,19,16,0.89,0.00,0.11
2074,4.A.4.c.3.I06.D03,Prescribe medications.,31,26,0.97,0.00,0.03


Identified 831 DWAs with varying execution types across occupations.


In [43]:
# Read the merged data
merged_data = pd.read_csv(f"{input_data_path}/computed_objects/ONET_Eloundou_Anthropic_GPT/ONET_Eloundou_Anthropic_GPT.csv")
merged_data = merged_data[['O*NET-SOC Code', 'Occupation Title', 'Task ID', 'Task Title',
       'Task Position', 'Task Type', 
       'Major_Group_Code', 'Major_Group_Title', 
       'Minor_Group_Code', 'Minor_Group_Title',
       'Broad_Occupation_Code', 'Broad_Occupation_Title',
       'Detailed_Occupation_Code', 'Detailed_Occupation_Title',
       'gpt4_exposure', 'human_labels', 
       'automation', 'augmentation', 'label']]


# Create is_ai and is_automated flags in merged_data
merged_data['is_ai'] = merged_data['label'].isin(['Augmentation','Automation']).astype(int)
merged_data['is_automated'] = merged_data['label'].isin(['Automation']).astype(int)
merged_data['is_exposed'] = merged_data['human_labels'].isin(['E1']).astype(int)


# Step 1: Add occupation's number of tasks info
num_tasks_per_occupation = merged_data.groupby('O*NET-SOC Code')['Task ID'].nunique().reset_index()
num_tasks_per_occupation = num_tasks_per_occupation.rename(columns={'Task ID': 'num_tasks'})
merged_data = merged_data.merge(num_tasks_per_occupation, on='O*NET-SOC Code', how='left')


# Step 2: Create flags for previous/next tasks is AI within occupation groups
# Sort by occupation and position when possible
merged_data['Task Position'] = pd.to_numeric(merged_data['Task Position'], errors='coerce')
merged_data = merged_data.sort_values(['O*NET-SOC Code', 'Task Position']).reset_index(drop=True)
group_col = 'O*NET-SOC Code'

# Compute neighbor flags (prev/next) within occupation groups when possible
merged_data['prev_is_ai'] = 0
merged_data['prev2_is_ai'] = 0
merged_data['next_is_ai'] = 0
merged_data['next2_is_ai'] = 0
pos_col = 'Task Position'

def add_neighbor_flags(df):
    df = df.copy()
    df['Task Position'] = pd.to_numeric(df['Task Position'], errors='coerce')
    df = df.sort_values(['O*NET-SOC Code','Task Position']).reset_index(drop=True)
    def _add_flags(g):
        g = g.sort_values('Task Position')
        g['prev_is_ai'] = g['is_ai'].shift(1).fillna(0).astype(int)
        g['prev2_is_ai'] = g['is_ai'].shift(2).fillna(0).astype(int)
        # g['prev2_is_ai'] = ((g['prev2_is_ai'] == 1) & (g['prev_is_ai'] == 1)).astype(int)
        g['next_is_ai'] = g['is_ai'].shift(-1).fillna(0).astype(int)
        g['next2_is_ai'] = g['is_ai'].shift(-2).fillna(0).astype(int)
        # g['next2_is_ai'] = ((g['next2_is_ai'] == 1) & (g['next_is_ai'] == 1)).astype(int)
        return g
    return df.groupby('O*NET-SOC Code', group_keys=False).apply(_add_flags).reset_index(drop=True)
merged_data = merged_data.groupby(group_col, group_keys=False).apply(add_neighbor_flags).reset_index(drop=True)



# Step 3: Add back DWA info
# Merge back DWA ID and DWA Titles to the merged_data
dwa_task_mapping = pd.read_csv(f"{input_data_path}/computed_objects/similar_dwa_tasks/dwa_task_mapping.csv")
merged_data = merged_data.merge(dwa_task_mapping, on=['Task ID', 'Task Title', 'O*NET-SOC Code', 'Occupation Title'], how='left')
# Note that the merge might map multiple DWAs to the same task


# Step 4: Flag tasks whose DWA ID appears in dwa_execTypeVarying_id_list
merged_data['dwa_execType_varying'] = merged_data.get('DWA ID', pd.Series()).isin(dwas_varying_exec_types_ids).fillna(False).astype(int)

# Remove duplicates in terms of (O*NET-SOC Code, Task ID) if any
print(f'Length of merged_data before dropping duplicates: {merged_data.shape[0]}')
merged_data = merged_data.drop_duplicates(subset=['O*NET-SOC Code', 'Task ID'])
print(f'Length of merged_data after dropping duplicates: {merged_data.shape[0]}')


# Summary for flagged DWA rows
mask = merged_data['dwa_execType_varying'] == 1
n_flagged = int(mask.sum())
print(f'\nNumber of dwa_execType_varying rows: {n_flagged}')
if n_flagged > 0:
    for c in ['prev2_is_ai', 'prev_is_ai', 'next_is_ai', 'next2_is_ai']:
        s = int(merged_data.loc[mask, c].sum())
        frac = merged_data.loc[mask, c].mean()
        print(f'{c}: {s} of {n_flagged} flagged rows (fraction={frac:.3f})')
    try:
        display(merged_data.loc[mask].head())
    except Exception:
        print(merged_data.loc[mask].head().to_string(index=False))
else:
    print('No flagged rows to summarize.')


Length of merged_data before dropping duplicates: 22267
Length of merged_data after dropping duplicates: 17920

Number of dwa_execType_varying rows: 8694
prev2_is_ai: 1582 of 8694 flagged rows (fraction=0.182)
prev_is_ai: 1717 of 8694 flagged rows (fraction=0.197)
next_is_ai: 1681 of 8694 flagged rows (fraction=0.193)
next2_is_ai: 1528 of 8694 flagged rows (fraction=0.176)


Unnamed: 0,O*NET-SOC Code,Occupation Title,Task ID,Task Title,Task Position,Task Type,Major_Group_Code,Major_Group_Title,Minor_Group_Code,Minor_Group_Title,...,is_automated,is_exposed,num_tasks,prev_is_ai,prev2_is_ai,next_is_ai,next2_is_ai,DWA ID,DWA Title,dwa_execType_varying
0,11-1011.00,Chief Executives,20461,"Review and analyze legislation, laws, or publi...",1,Core,11-0000,Management Occupations,11-1000,Top Executives,...,0,0,31,0,0,0,1,4.A.2.a.4.I09.D03,Analyze impact of legal or regulatory changes.,1
3,11-1011.00,Chief Executives,8825,Analyze operations to evaluate performance of ...,3,Core,11-0000,Management Occupations,11-1000,Top Executives,...,0,0,31,0,1,1,0,4.A.2.a.4.I07.D09,Analyze data to assess operational or project ...,1
4,11-1011.00,Chief Executives,8824,"Confer with board members, organization offici...",4,Core,11-0000,Management Occupations,11-1000,Top Executives,...,0,0,31,1,0,0,0,4.A.4.a.2.I03.D14,Confer with organizational members to accompli...,1
6,11-1011.00,Chief Executives,8826,"Direct, plan, or implement policies, objective...",6,Core,11-0000,Management Occupations,11-1000,Top Executives,...,0,0,31,0,1,1,0,4.A.2.b.1.I09.D01,Implement organizational process or policy cha...,1
9,11-1011.00,Chief Executives,8843,"Interpret and explain policies, rules, regulat...",7,Core,11-0000,Management Occupations,11-1000,Top Executives,...,0,1,31,0,0,0,0,4.A.4.a.1.I02.D03,Communicate organizational policies and proced...,1


### Run regression of multiple-execution-type DWA tasks against execution type of neighboring tasks

In [44]:
# Regressions: neighbor flags on is_ai (Logit, no FE + several FE models)
# Assumption: dependent variable = 'is_ai' and regressors are the four neighbor flags
# (prev2_is_ai, prev_is_ai, next_is_ai, next2_is_ai).
# Runs on full `merged_data` and on filtered subset where dwa_execType_varying==1.

import pandas as pd
import numpy as np
import statsmodels.formula.api as smf
from pathlib import Path
from statsmodels.stats.sandwich_covariance import cov_hc1
from scipy.stats import norm


# ------- helpers -------
# Extract params, robust SEs (HC1) and model sample/df info
def extract_coef_series(res):
    try:
        if hasattr(res, 'get_robustcov_results'):
            r = res.get_robustcov_results(cov_type='HC1')
            nobs = getattr(r, 'nobs', getattr(res, 'nobs', np.nan))
            df_resid = getattr(r, 'df_resid', getattr(res, 'df_resid', np.nan))
            return r.params, r.bse, r.pvalues, nobs, df_resid
        else:
            params = res.params
            try:
                robust_cov = cov_hc1(res)
                bse_arr = np.sqrt(np.diag(robust_cov))
                z_scores = params.values / bse_arr
                pvals = 2 * (1 - norm.cdf(np.abs(z_scores)))
                bse = pd.Series(bse_arr, index=params.index)
                pvalues = pd.Series(pvals, index=params.index)
                nobs = getattr(res, 'nobs', np.nan)
                df_resid = getattr(res, 'df_resid', np.nan)
                return params, bse, pvalues, nobs, df_resid
            except Exception:
                bse = getattr(res, 'bse', pd.Series(np.nan, index=params.index))
                pvalues = getattr(res, 'pvalues', pd.Series(np.nan, index=params.index))
                nobs = getattr(res, 'nobs', np.nan)
                df_resid = getattr(res, 'df_resid', np.nan)
                return params, bse, pvalues, nobs, df_resid
    except Exception:
        params = getattr(res, 'params', pd.Series())
        bse = getattr(res, 'bse', pd.Series())
        pvalues = getattr(res, 'pvalues', pd.Series())
        nobs = getattr(res, 'nobs', np.nan)
        df_resid = getattr(res, 'df_resid', np.nan)
        return params, bse, pvalues, nobs, df_resid

# Build tidy coef dataframe for target regs
def build_tidy_coefs(res, dataset_name, model_name):
    params, bse, pvalues, nobs, df_resid = extract_coef_series(res)
    if len(params) == 0:
        return pd.DataFrame(columns=['dataset','model','term','coef','std_err','p_value','nobs','df_resid'])
    df = pd.DataFrame({
        'term': params.index.astype(str),
        'coef': params.values,
        'std_err': bse.values if hasattr(bse, 'values') else np.array(bse),
        'p_value': pvalues.values if hasattr(pvalues, 'values') else np.array(pvalues)
    })
    df['model'] = model_name
    df['dataset'] = dataset_name
    df['nobs'] = nobs
    df['df_resid'] = df_resid
    df = df[df['term'].isin(TARGET_REGS)].reset_index(drop=True)
    df = df[['dataset','model','nobs','df_resid','term','coef','std_err','p_value']]
    return df

# Drop FE groups with no within-group variation in y or too small size
def keep_var_groups(df, fe_col, y, min_size=2):
    return df.groupby(fe_col).filter(lambda g: g[y].nunique() == 2 and len(g) >= min_size)

# Core function to run requested regressions on a DataFrame
def run_regressions_on(df, dataset_name, dependent_var, regressors):
    df = df.copy()

    # make sure regressors and outcome exist and are numeric
    regs = TARGET_REGS
    for r in regs + [dependent_var]:
        if r not in df.columns:
            raise KeyError(f'Required column {r} not found in dataset {dataset_name}')
    df[regs] = df[regs].apply(pd.to_numeric, errors='coerce').fillna(0)
    df[dependent_var] = pd.to_numeric(df[dependent_var], errors='coerce').fillna(0)

    regressors_str = ' + '.join(regressors)
    base_formula = f'{dependent_var} ~ {regressors_str}'
    models = {}
    tidy_list = []

    # 1) No fixed effects (Logit)
    try:
        formula = base_formula + ' + is_exposed + num_tasks'
        res_no_fe = smf.logit(formula, data=df).fit(disp=False)
        models['no_fe'] = res_no_fe
        tidy_list.append(build_tidy_coefs(res_no_fe, dataset_name, 'no_fe'))
    except Exception as e:
        print('No-FE logit model failed:', e)

    # 2) Fixed effects models (each separately)
    fe_cols = [
        ('Major_Group_Code','MajorGroup'),
        ('Minor_Group_Code','MinorGroup')
    ]

    for col, short in fe_cols:
        formula = base_formula + f' + C({col}) + is_exposed + num_tasks'

        # Drop problematic FE groups for THIS FE
        df_fe = keep_var_groups(df, col, y=dependent_var, min_size=2)
        kept = df_fe[col].nunique()
        dropped = df[col].nunique() - kept

        try:
            res = smf.logit(formula, data=df_fe).fit(disp=False)
            models[f'fe_{short}'] = res
            tidy_list.append(build_tidy_coefs(res, dataset_name, f'fe_{short}'))
        except Exception as e:
            print(f'FE logit model with {col} failed:', e)

    # Combine tidy coeffs for this dataset
    if len(tidy_list):
        tidy_combined = pd.concat(tidy_list, ignore_index=True)
    else:
        tidy_combined = pd.DataFrame(columns=['dataset','model','nobs','df_resid','term','coef','std_err','p_value'])

    # Save per-dataset four-variable summary
    out_dir = Path(f'{output_data_path}/regression_summaries_{dependent_var}')
    out_dir.mkdir(parents=True, exist_ok=True)
    out_path = out_dir / f'regression_{dataset_name}_fourvars_summary.csv'
    tidy_combined.to_csv(out_path, index=False)
    # print(f'Saved per-dataset four-variable summary to {out_path}')

    return models, tidy_combined

In [45]:
# Run regression on original datasets: full and filtered (dwa_execType_varying == 1)
print('Starting regressions on full merged_data')
print(f'Full dataset has {merged_data.shape[0]} rows.')
models_full, coefs_full = run_regressions_on(merged_data, 'full_merged_data', dependent_var=dependent_var, regressors=TARGET_REGS)


print('\nStarting regressions on filtered dataset (dwa_execType_varying == 1)')
filtered = merged_data[merged_data['dwa_execType_varying'] == 1].reset_index(drop=True)
print(f'Filtered dataset has {filtered.shape[0]} rows.')
models_filtered, coefs_filtered = run_regressions_on(filtered, 'filtered_dwaExecTypeVarying', dependent_var=dependent_var, regressors=TARGET_REGS)


combined_all = pd.concat([coefs_full, coefs_filtered], ignore_index=True)
final_out = Path(output_data_path) / f'regression_fourvars_allDatasets_{dependent_var}.csv'
combined_all.to_csv(final_out, index=False)
print(f'Combined four-variable CSV saved to {final_out}')

Starting regressions on full merged_data
Full dataset has 17920 rows.

Starting regressions on filtered dataset (dwa_execType_varying == 1)
Filtered dataset has 8694 rows.
Combined four-variable CSV saved to ../data/computed_objects/execTypeVaryingDWA_anthropicIndex/regression_fourvars_allDatasets_is_ai.csv


In [46]:
filtered[['O*NET-SOC Code', 'Occupation Title', 'Task ID', 'Task Title', 'Task Position', 'label', 'human_labels']]

Unnamed: 0,O*NET-SOC Code,Occupation Title,Task ID,Task Title,Task Position,label,human_labels
0,11-1011.00,Chief Executives,20461,"Review and analyze legislation, laws, or publi...",1,Augmentation,E2
1,11-1011.00,Chief Executives,8825,Analyze operations to evaluate performance of ...,3,Augmentation,E2
2,11-1011.00,Chief Executives,8824,"Confer with board members, organization offici...",4,Augmentation,E0
3,11-1011.00,Chief Executives,8826,"Direct, plan, or implement policies, objective...",6,Manual,E0
4,11-1011.00,Chief Executives,8843,"Interpret and explain policies, rules, regulat...",7,Augmentation,E1
...,...,...,...,...,...,...,...
8689,53-7081.00,Refuse and Recyclable Material Collectors,7180,Tag garbage or recycling containers to inform ...,11,Manual,E1
8690,53-7081.00,Refuse and Recyclable Material Collectors,7184,Provide quotes for refuse collection contracts.,16,Manual,E2
8691,53-7121.00,"Tank Car, Truck, and Ship Loaders",12792,"Verify tank car, barge, or truck load numbers ...",1,Manual,E2
8692,53-7121.00,"Tank Car, Truck, and Ship Loaders",12801,"Test samples for specific gravity, using hydro...",15,Manual,E0


In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pathlib import Path

# --- logistic transformation ---
def logistic(x):
    return 1 / (1 + np.exp(-x)) if not np.isnan(x) else np.nan

# Compute observed (non-reshuffled) coefficients
print('Computing observed coefficients (original ordering)')
obs_models_full, obs_coefs_full = run_regressions_on(merged_data, 'obs_full', dependent_var=dependent_var, regressors=TARGET_REGS)
filtered_obs = merged_data[merged_data['dwa_execType_varying'] == 1].reset_index(drop=True)
obs_models_filt, obs_coefs_filt = run_regressions_on(filtered_obs, 'obs_filtered', dependent_var=dependent_var, regressors=TARGET_REGS)

# --- convert tidy results to dict of propensities instead of raw coefs ---
def tidy_to_dict(tidy_df):
    out = {spec: {term: np.nan for term in TARGET_REGS} for spec in SPECS}
    for _, row in tidy_df.iterrows():
        spec = row['model']
        term = row['term']
        if spec in out and term in out[spec]:
            coef = row['coef']
            out[spec][term] = logistic(coef)  # convert to propensity
    return out

obs_dict_full = tidy_to_dict(obs_coefs_full)
obs_dict_filt = tidy_to_dict(obs_coefs_filt)

# Prepare containers for reshuffled *propensities*
resh_full = {spec: {t: [] for t in TARGET_REGS} for spec in SPECS}
resh_filt = {spec: {t: [] for t in TARGET_REGS} for spec in SPECS}

out_dir = Path(f"{output_data_path}/regression_summaries_{dependent_var}")
out_dir.mkdir(parents=True, exist_ok=True)

print(f'Running {n_shuffles} reshuffles...')
for i in range(n_shuffles):
    seed = 42 + i
    fname_full = out_dir / f'regression_shuf_full_{i}_{dependent_var}_fourvars_summary.csv'
    fname_filt = out_dir / f'regression_shuf_filt_{i}_{dependent_var}_fourvars_summary.csv'

    if fname_full.exists() and fname_filt.exists():
        coefs_shuf_full = pd.read_csv(fname_full)
        coefs_shuf_filt = pd.read_csv(fname_filt)
        print(f'  Seed {i}: loaded existing results')
    else:
        df_shuf = merged_data.copy()
        df_shuf['Task Position'] = df_shuf.groupby('O*NET-SOC Code')['Task Position'].transform(
            lambda x: x.sample(frac=1, random_state=seed).values
        )
        df_shuf = add_neighbor_flags(df_shuf)
        _, coefs_shuf_full = run_regressions_on(df_shuf, f'shuf_full_{i}', dependent_var=dependent_var, regressors=TARGET_REGS)
        
        df_shuf_filt = df_shuf[df_shuf['dwa_execType_varying'] == 1].reset_index(drop=True)
        _, coefs_shuf_filt = run_regressions_on(df_shuf_filt, f'shuf_filt_{i}', dependent_var=dependent_var, regressors=TARGET_REGS)

    # Convert to dict of propensities
    d_full = tidy_to_dict(coefs_shuf_full) if not coefs_shuf_full.empty else tidy_to_dict(pd.DataFrame())
    d_filt = tidy_to_dict(coefs_shuf_filt) if not coefs_shuf_filt.empty else tidy_to_dict(pd.DataFrame())

    for spec in SPECS:
        for t in TARGET_REGS:
            resh_full[spec][t].append(d_full.get(spec, {}).get(t, np.nan))
            resh_filt[spec][t].append(d_filt.get(spec, {}).get(t, np.nan))

    if (i+1) % 50 == 0:
        print(f'  Completed {i+1}/{n_shuffles}')

print('Reshuffles complete; creating comparative plots')

Computing observed coefficients (original ordering)
Running 1000 reshuffles...
  Completed 50/1000
  Completed 100/1000
  Completed 150/1000
  Completed 200/1000
  Completed 250/1000
  Completed 300/1000
  Completed 350/1000
  Completed 400/1000


In [None]:
# --- Plotting: distributions of propensities ---
def plot_comparison_hist(resh_dict, obs_dict, title, out_name, plot_title_variable, bins=30):
    """Create the multi-row comparison histogram and also save each row (spec) as a separate image.

    Args:
        resh_dict: dict of reshuffled propensities per spec and term
        obs_dict: dict of observed propensities per spec and term
        title: title string to include in saved figures
        out_name: filename for the full multi-row figure
        plot_title_variable: human-readable dependent var name for titles
        bins: histogram bins
    """
    colors = [plt.cm.tab10(i % 10) for i in range(len(SPECS))]
    fig, axes = plt.subplots(nrows=len(SPECS), ncols=len(TARGET_REGS), figsize=(6*len(TARGET_REGS), 5*len(SPECS)), sharey='col')

    for r, spec in enumerate(SPECS):
        color_row = colors[r]
        for c, term in enumerate(TARGET_REGS):
            ax = axes[r, c] if len(SPECS) > 1 else axes[c]
            vals = np.array(resh_dict[spec][term], dtype=float)
            vals_clean = vals[~np.isnan(vals)]

            if len(vals_clean):
                ax.hist(vals_clean, bins=bins, color=color_row, alpha=0.7, edgecolor='k')
                lo, hi = np.percentile(vals_clean, [2.5, 97.5])
                ax.axvline(lo, color=color_row, linestyle=':', alpha=0.8)
                ax.axvline(hi, color=color_row, linestyle=':', alpha=0.8)
                ax.axvline(np.mean(vals_clean), color=color_row, linestyle='-', alpha=0.9)
            else:
                ax.text(0.5, 0.5, 'no estimates', ha='center', va='center')

            # observed propensity (red dashed)
            obs_val = obs_dict.get(spec, {}).get(term, np.nan)
            if not np.isnan(obs_val):
                ax.axvline(obs_val, color='red', linestyle='--', linewidth=3, label='observed')

            # baseline: random (0.5 probability)
            ax.axvline(0.5, color='black', linestyle='--', linewidth=2, alpha=0.7, label='0.5 baseline')

            if r == 0:
                ax.set_title(term, fontsize=12)
            if r == len(SPECS) - 1:
                ax.set_xlabel('Regression Coefficient', fontsize=12)
            if c == 0:
                ax.set_ylabel(spec, fontsize=12)
            ax.set_xlim(0.42, 0.78)

            ax.grid(axis='y', linestyle=':', alpha=0.5)

    # fig.suptitle(f'Reshuffled vs. Observed Propensity (P[ {plot_title_variable} | Neighbor AI ])\n\n{title}',
    #              fontsize=16, fontweight='bold')
    fig.tight_layout(rect=[0, 0, 1, 0.97])

    # Ensure output dir exists
    Path(output_plot_path).mkdir(parents=True, exist_ok=True)

    # Save full multi-row figure
    out_dir = f'{output_plot_path}/{dependent_var}'
    os.makedirs(out_dir, exist_ok=True)
    out_path = f'{out_dir}/{out_name}'
    fig.savefig(out_path, dpi=150, bbox_inches='tight')
    print('Saved full multi-row plot to', out_path)

    # Also save each row (each spec) as its own figure while keeping the multi-row output
    base_name = out_name.rsplit('.', 1)[0]
    for r, spec in enumerate(SPECS):
        # Create a single-row figure with one column per target reg
        fig_row, axs_row = plt.subplots(nrows=1, ncols=len(TARGET_REGS), figsize=(24, 5), sharey=False)
        # normalize axs_row to list for consistent indexing
        if len(TARGET_REGS) == 1:
            axs_row = [axs_row]
        color_row = colors[r]
        for c, term in enumerate(TARGET_REGS):
            axr = axs_row[c]
            vals = np.array(resh_dict[spec][term], dtype=float)
            vals_clean = vals[~np.isnan(vals)]

            if len(vals_clean):
                axr.hist(vals_clean, bins=bins, color=color_row, alpha=0.7, edgecolor='k')
                lo, hi = np.percentile(vals_clean, [2.5, 97.5])
                axr.axvline(lo, color=color_row, linestyle=':', alpha=0.8)
                axr.axvline(hi, color=color_row, linestyle=':', alpha=0.8)
                axr.axvline(np.mean(vals_clean), color=color_row, linestyle='-', alpha=0.9)
            else:
                axr.text(0.5, 0.5, 'no estimates', ha='center', va='center')

            obs_val = obs_dict.get(spec, {}).get(term, np.nan)
            if not np.isnan(obs_val):
                axr.axvline(obs_val, color='red', linestyle='--', linewidth=3, label='observed')

            axr.axvline(0.5, color='black', linestyle='--', linewidth=2, alpha=0.7, label='0.5 baseline')
            axr.set_title(term, fontsize=12)
            if c == 0:
                axr.set_ylabel(spec, fontsize=12)
            axr.grid(axis='y', linestyle=':', alpha=0.5)
            axr.set_xlim(0.42, 0.78)
            axr.set_xlabel('Regression Coefficient', fontsize=12)

        # fig_row.suptitle(f'{spec} — Reshuffled vs. Observed Propensity (P[{plot_title_variable} | Neighbor AI])\n\n{title}', fontsize=14)
        fig_row.tight_layout(rect=[0, 0, 1, 0.95])
        # Save
        out_dir = f'{output_plot_path}/{dependent_var}'
        os.makedirs(out_dir, exist_ok=True)
        out_path_row = f'{out_dir}/{base_name}_{spec}.png'
        fig_row.savefig(out_path_row, dpi=150, bbox_inches='tight')
        plt.close(fig_row)
        print('Saved row plot to', out_path_row)

    plt.close()

# --- Run plots ---
Path(output_plot_path).mkdir(parents=True, exist_ok=True)
plot_comparison_hist(resh_full, obs_dict_full, f'FULL Dataset (n={n_shuffles})', f'propensity_full_{dependent_var}.png', plot_title_variable)
plot_comparison_hist(resh_filt, obs_dict_filt, f'FILTERED Dataset (n={n_shuffles})', f'propensity_filtered_{dependent_var}.png', plot_title_variable)

print('All done: comparative propensity histogram figures created.')

Saved full multi-row plot to ../writeup/plots/execTypeVaryingDWA/is_ai/propensity_full_is_ai.png
Saved row plot to ../writeup/plots/execTypeVaryingDWA/is_ai/propensity_full_is_ai_no_fe.png
Saved row plot to ../writeup/plots/execTypeVaryingDWA/is_ai/propensity_full_is_ai_fe_MajorGroup.png
Saved row plot to ../writeup/plots/execTypeVaryingDWA/is_ai/propensity_full_is_ai_fe_MinorGroup.png
Saved full multi-row plot to ../writeup/plots/execTypeVaryingDWA/is_ai/propensity_filtered_is_ai.png
Saved row plot to ../writeup/plots/execTypeVaryingDWA/is_ai/propensity_filtered_is_ai_no_fe.png
Saved row plot to ../writeup/plots/execTypeVaryingDWA/is_ai/propensity_filtered_is_ai_fe_MajorGroup.png
Saved row plot to ../writeup/plots/execTypeVaryingDWA/is_ai/propensity_filtered_is_ai_fe_MinorGroup.png
All done: comparative propensity histogram figures created.
