In [49]:
import pandas as pd
import numpy as np
import random
import statsmodels.api as sm
from ppi_py import ppi_ols_ci, classical_ols_ci, ppi_ols_pointestimate

df = pd.read_csv("../Data/5_SurveySampleLLM.csv.gz")

Covs = ['PedPed', 'Barrier', 'CrossingSignal', 'NumberOfCharacters',
        'DiffNumberOFCharacters', 'LeftHand', 'Man', 'Woman', 'Pregnant',
        'Stroller', 'OldMan', 'OldWoman', 'Boy', 'Girl', 'Homeless',
        'LargeWoman', 'LargeMan', 'Criminal', 'MaleExecutive',
        'FemaleExecutive', 'FemaleAthlete', 'MaleAthlete', 'FemaleDoctor',
        'MaleDoctor', 'Dog', 'Cat', 
        'Intervention'
        ]



In [2]:
# Very few missing predicted values for the dependent variable
print("Number of NAs Saved: ",df["Saved"].isna().sum())
print("Number of NAs gpt4turbo_wp_Saved: ",df["gpt4turbo_wp_Saved"].isna().sum())
print("Number of NAs gpt4o_wp_Saved: ",df["gpt4o_wp_Saved"].isna().sum())
print("Number of NAs gpt35turbo0125_wp_Saved: ",df["gpt35turbo0125_wp_Saved"].isna().sum())

Number of NAs Saved:  0
Number of NAs gpt4turbo_wp_Saved:  0
Number of NAs gpt4o_wp_Saved:  6
Number of NAs gpt35turbo0125_wp_Saved:  2


In [47]:
# calculate amce for intervention
def compute_amce_pooled(data, x, y, alpha=0.05):

    # drop rows with missing values for dependent variable
    dd = data.dropna(subset=y)

    if x=="Intervention":
    
        # design matrix
        X = dd["Intervention"]
        X = sm.add_constant(X)
        
        # estimate model with standard errors clustered on UserID
        model = sm.WLS(dd[y], X, weights=dd["weights"])
        fit = model.fit(cov_type = 'cluster', cov_kwds = {'groups': dd["UserID"]})
        coef = fit.params["Intervention"]
        ci = fit.conf_int(alpha=alpha).loc["Intervention"]
        
        # store results
        res = pd.DataFrame({
            'x': [x],
            'y': [y],
            'pointest_pooled': [coef],
            'conf_low_pooled': [ci[0]],
            'conf_high_pooled': [ci[1]]
        })

        return(res)

compute_amce_pooled(df, x="Intervention", y="Saved")
# same as results from R, est=0.06821629, ci.low=0.05246370, ci.high=0.08396889


print("gpt35",compute_amce_pooled(df, x="Intervention", y="gpt35turbo0125_wp_Saved"))
print("gpt4t",compute_amce_pooled(df, x="Intervention", y="gpt4turbo_wp_Saved"))

gpt35               x                        y  pointest_pooled  conf_low_pooled  \
0  Intervention  gpt35turbo0125_wp_Saved         0.047441         0.033755   

   conf_high_pooled  
0          0.061127  
gpt4t               x                   y  pointest_pooled  conf_low_pooled  \
0  Intervention  gpt4turbo_wp_Saved         0.085126         0.071104   

   conf_high_pooled  
0          0.099149  


In [27]:
# calculate PPI CI and CI based on silicon and human subjects
def compute_amce_ppi(n_data, N_data, x, y, alpha=0.05):

    # drop rows with missing values for dependent variable
    df_human = n_data.dropna(subset=y)
    df_silicon = N_data.dropna(subset=y)

    # labeled data
    Xn = np.ones((df_human.shape[0],2))           # intercept
    Xn[:,1] = df_human[x]                         # covariates in the labeled data
    Yn_ppl  = df_human['Saved'].to_numpy()        # observed outcomes
    Yn_gpt  = df_human[y].to_numpy()              # LLM predictions for labeled data
    w_labeled = df_human['weights'].to_numpy()    # define weigths for the labeled data

    # unlabeled data
    XN = np.ones((df_silicon.shape[0],2))
    XN[:,1] = df_silicon[x]
    YN_gpt = df_silicon[y].to_numpy()
    w_unlabeled = df_silicon['weights'].to_numpy()

    # calculate point estimate
    pointest_ppi = ppi_ols_pointestimate(Xn, Yn_ppl, Yn_gpt, XN, YN_gpt, w=w_labeled, w_unlabeled=w_unlabeled)

    # calculate confidence intervals https://ppi-py.readthedocs.io/en/latest/baselines.html#ppi_py.classical_ols_ci
    lower_CI_ppi, upper_CI_ppi = ppi_ols_ci(Xn, Yn_ppl, Yn_gpt, XN, YN_gpt, w=w_labeled, w_unlabeled=w_unlabeled,alpha=alpha)
    lower_CI_ols, upper_CI_ols = classical_ols_ci(Xn, Yn_ppl, w=w_labeled,alpha=alpha)

    # create and return the output DataFrame
    output_df = pd.DataFrame({
        "y": y,                              # Dependent variable (Saved)
        "x": x,                              # Predictor variable (scenario attribute)
        "pointest_ppi": pointest_ppi[1],     # PPI point estimate
        "conf_low_ppi": lower_CI_ppi[1],     # The lower bound of the PPI confidence interval
        "conf_high_ppi": upper_CI_ppi[1],    # The upper bound of the PPI confidence interval
        "conf_low_ols": lower_CI_ols[1],     # The lower bound of the OLS confidence interval
        "conf_high_ols": upper_CI_ols[1]},   # The upper bound of the OLS confidence interval
        index=[0])
    
    return output_df

In [42]:
ns = [50,500]
Ns = [50,100,200,300,#400,500,600,700,800,900,1000,
      1250,1500,1750,2000,2250,2500,2750,3000,
      3250,3500,3750,4000,4250,4500,4750,5000
      ]
Ys = ["gpt4turbo_wp_Saved","gpt4o_wp_Saved","gpt35turbo0125_wp_Saved"]
Xs = ['Intervention']
reps = 100
result = pd.DataFrame()
ids = df["ResponseID"].unique()

for y in Ys: 
  print(f"Iterating over dependent variable: {y}")

  for x in Xs:

    for n in ns:
      print(f"    Predictor: {x} with human sample size {n}")

      for N in Ns:
        print(f"        Iterating over the LLM sample size: {N}")

        for r in range(reps):
          
          # sample ids
          n_ids = random.sample(ids.tolist(), k=n)
          N_ids = random.sample(list(set(ids) - set(n_ids)), k=N)

          # subset data
          df_human = df[ df["ResponseID"].isin(n_ids) ]
          df_silicon = df [ df["ResponseID"].isin(N_ids)]

          df_pooled = pd.concat([df_human,df_silicon], ignore_index=True)
          
          # calculate confidence intervals
          ppi = compute_amce_ppi(n_data=df_human, N_data=df_silicon, x=x, y=y)
          print("ppi",ppi)
          pooled = compute_amce_pooled(data=df_pooled,x=x,y=y)
          print("pooled",pooled)

          to_append = pd.merge(ppi, pooled, on=['x','y'], how='outer')
          to_append["n"] = n
          to_append["N"] = N
          
          result = pd.concat([result, to_append], ignore_index=True)
          del ppi 
          del pooled 
          del to_append
          
          

Iterating over dependent variable: gpt4turbo_wp_Saved
    Predictor: Intervention with human sample size 50
        Iterating over the LLM sample size: 50
ppi                     y             x  pointest_ppi  conf_low_ppi  \
0  gpt4turbo_wp_Saved  Intervention      0.216714       -0.0206   

   conf_high_ppi  conf_low_ols  conf_high_ols  
0       0.430316     -0.074821       0.384429  
pooled               x                   y  pointest_pooled  conf_low_pooled  \
0  Intervention  gpt4turbo_wp_Saved         0.135528        -0.067444   

   conf_high_pooled  
0          0.338501  
ppi                     y             x  pointest_ppi  conf_low_ppi  \
0  gpt4turbo_wp_Saved  Intervention     -0.174972     -0.391363   

   conf_high_ppi  conf_low_ols  conf_high_ols  
0       0.041804     -0.378245       0.056366  
pooled               x                   y  pointest_pooled  conf_low_pooled  \
0  Intervention  gpt4turbo_wp_Saved         0.194147        -0.007722   

   conf_high_pooled  
0

In [43]:
result.to_csv("../Data/6_ResultsPPI.csv.gz", compression="gzip", index=False)
result


Unnamed: 0,y,x,pointest_ppi,conf_low_ppi,conf_high_ppi,conf_low_ols,conf_high_ols,pointest_pooled,conf_low_pooled,conf_high_pooled,n,N
0,gpt4turbo_wp_Saved,Intervention,0.216714,-0.020600,0.430316,-0.074821,0.384429,0.135528,-0.067444,0.338501,50,50
1,gpt4turbo_wp_Saved,Intervention,-0.174972,-0.391363,0.041804,-0.378245,0.056366,0.194147,-0.007722,0.396016,50,50
2,gpt4turbo_wp_Saved,Intervention,0.013768,-0.218922,0.241910,-0.217767,0.240755,-0.066542,-0.276190,0.143106,50,50
3,gpt4turbo_wp_Saved,Intervention,-0.070993,-0.289586,0.145017,-0.333882,0.119804,0.036819,-0.167971,0.241608,50,50
4,gpt4turbo_wp_Saved,Intervention,-0.172053,-0.392348,0.051072,-0.349041,0.101731,0.031544,-0.180461,0.243550,50,50
...,...,...,...,...,...,...,...,...,...,...,...,...
11995,gpt35turbo0125_wp_Saved,Intervention,0.080954,0.009841,0.152078,0.007505,0.151563,0.025131,-0.002895,0.053156,500,5000
11996,gpt35turbo0125_wp_Saved,Intervention,0.107670,0.035858,0.178694,0.021581,0.167031,0.031683,0.004118,0.059247,500,5000
11997,gpt35turbo0125_wp_Saved,Intervention,0.072883,0.000370,0.143599,-0.011231,0.132528,0.045559,0.017633,0.073486,500,5000
11998,gpt35turbo0125_wp_Saved,Intervention,0.133527,0.062779,0.204814,0.065326,0.208133,0.057417,0.028838,0.085996,500,5000
