In [1]:
import pandas as pd
import numpy as np
from ppi_py import ppi_ols_ci, classical_ols_ci, ppi_ols_pointestimate

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


In [167]:
df.groupby('NumberOfCharacters').count()

Unnamed: 0_level_0,ResponseID,AttributeLevel,ScenarioTypeStrict,ScenarioType,Intervention,PedPed,Barrier,CrossingSignal,DiffNumberOFCharacters,LeftHand,...,Saved,SavedGPT4,SavedGPT3,SavedLlama2,SavedPalm2,ResponseIDGPT4,ResponseIDGPT3,ResponseIDLlama2,ResponseIDPalm2,weights
NumberOfCharacters,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,880,880,880,880,880,880,880,880,880,880,...,880,535,880,879,880,535,880,879,880,880
2,2593,2593,2593,2593,2593,2593,2593,2593,2593,2593,...,2593,956,2569,2505,2593,956,2569,2505,2593,2593
3,3566,3566,3566,3566,3566,3566,3566,3566,3566,3566,...,3566,1023,3457,3269,3566,1023,3457,3269,3566,3566
4,3857,3857,3857,3857,3857,3857,3857,3857,3857,3857,...,3857,962,3725,3239,3856,962,3725,3239,3856,3857
5,4048,4048,4048,4048,4048,4048,4048,4048,4048,4048,...,4048,884,3913,3290,4047,884,3913,3290,4047,4048


In [5]:
#df1 = pd.read_csv("https://raw.githubusercontent.com/davidbroska/IntegrativeExperimentsGAI/main/Data/JoinedAwadTakemoto/Joined.csv")
df = pd.read_csv("../Data/JoinedLong.csv")

Covs = ['PedPed', 'Barrier', 'CrossingSignal', 'NumberOfCharacters',
        'DiffNumberOFCharacters', 'LeftHand', 'Man', 'Woman', 'Pregnant',
        'Stroller', 'OldMan', 'OldWoman', 'Boy', 'Girl', 'Homeless',
        'LargeWoman', 'LargeMan', 'Criminal', 'MaleExecutive',
        'FemaleExecutive', 'FemaleAthlete', 'MaleAthlete', 'FemaleDoctor',
        'MaleDoctor', 'Dog', 'Cat', 
        'Intervention'
        ]

def calc_ci(dataset, var, n, N, LLM):
  """

  Args:
      var: The name of the variable (e.g. Barrier, Dog,...).
      n: The sample size of the human responses.
      N: The sample size of the responses predicted by the algorithm.
      LLM: The name of the LLM: GPT4, GPT3, Llama2, or Palm2

  Returns:
      A pandas DataFrame with the following columns:
        * var: The name of the variable.
        * n: The sample size.
        * N: The population size.
        * lower_CI_ppi: The lower bound of the PPI confidence interval.
        * upper_CI_ppi: The upper bound of the PPI confidence interval.
        * lower_CI_ols: The lower bound of the OLS confidence interval.
        * upper_CI_ols: The upper bound of the OLS confidence interval.
  """

  df = dataset

  sub_df = df.sample(n+N, ignore_index=True)

  # ensure that there is variation in X and Y in the sample 
  ncats = sub_df.groupby([var,'Saved']).count().shape[0]
  ncatsLLM = sub_df.groupby([var,'Saved']).count().shape[0]

  # in the rare when there is no variation, sample again (4 bc there are two variables with two levels)
  while(ncats < 4 or ncatsLLM < 4):
    sub_df = df.sample(n+N, ignore_index=True)
    ncats = sub_df.groupby([var,'Saved']).count().shape[0]
    ncatsLLM = sub_df.groupby([var,'Saved']).count().shape[0]

  df_people = sub_df.iloc[:n]
  df_gpt = sub_df.iloc[n:]

  Xn = np.ones((n,2))                                                          # intercept
  Xn[:,1] = df_people[var]                                                     # covariates in the labeled data
  Yn_ppl = df_people['Saved'].to_numpy()                                       # observed outcomes
  Yn_gpt = df_people[('Saved'+LLM)].to_numpy()                                 # LLM predictions for labeled data
  w_labeled = df_people['weights'].to_numpy()                                  # define weigths for the labeled data

  XN = np.ones((N,2))
  XN[:,1] = df_gpt[var]
  YN_gpt = df_gpt['Saved'+LLM].to_numpy()
  w_unlabeled = df_gpt['weights'].to_numpy()

  # calculate point estimate
  pointest_ppi = ppi_ols_pointestimate(Xn, Yn_ppl, Yn_gpt, XN, YN_gpt, w=w_labeled, w_unlabeled=w_unlabeled)

  # calculate confidence intervals
  # https://ppi-py.readthedocs.io/en/latest/baselines.html#ppi_py.classical_ols_ci
  lower_CI_ppi, upper_CI_ppi = ppi_ols_ci(Xn, Yn_ppl, Yn_gpt, XN, YN_gpt, w=w_labeled, w_unlabeled=w_unlabeled,alpha=.05)
  lower_CI_ols, upper_CI_ols = classical_ols_ci(Xn, Yn_ppl, w=w_labeled,alpha=.05)

  # Create and return the output DataFrame
  output_df = pd.DataFrame({
      "var": var,
      "n": n,
      "N": N,
      "pointest_ppi": pointest_ppi[1],
      "lower_CI_ppi": lower_CI_ppi[1],
      "upper_CI_ppi": upper_CI_ppi[1],
      "lower_CI_ols": lower_CI_ols[1],
      "upper_CI_ols": upper_CI_ols[1]}, index=[0])
  return output_df

## GPT4 Prediction (new sampling)

In [36]:
GPT4 = df.dropna(subset=["SavedGPT4"])
GPT4 = GPT4.dropna(subset=["Saved"])
Covs
ns = range(50,250,50)
ks = range(1,21,1)



In [41]:
Covs = ['NumberOfCharacters','Boy','Girl','Woman','Man']
reps = 100
result_2 = pd.DataFrame()
for var in Covs:
  print(f"Iterating over the variable: {var}")

  for n in ns:
    print(f"with human sample size: {n}")

    for k in ks:
      N = n*k
      print(f"Iterating over the LLM sample size: {N}")

      for r in range(reps):
        result_2 = pd.concat([result_2, calc_ci(GPT4,var, n, N,'GPT4')])

Iterating over the variable: NumberOfCharacters
with human sample size: 50
Iterating over the LLM sample size: 50
Iterating over the LLM sample size: 100
Iterating over the LLM sample size: 150
Iterating over the LLM sample size: 200
Iterating over the LLM sample size: 250
Iterating over the LLM sample size: 300
Iterating over the LLM sample size: 350
Iterating over the LLM sample size: 400
Iterating over the LLM sample size: 450
Iterating over the LLM sample size: 500
Iterating over the LLM sample size: 550
Iterating over the LLM sample size: 600
Iterating over the LLM sample size: 650
Iterating over the LLM sample size: 700
Iterating over the LLM sample size: 750
Iterating over the LLM sample size: 800
Iterating over the LLM sample size: 850
Iterating over the LLM sample size: 900
Iterating over the LLM sample size: 950
Iterating over the LLM sample size: 1000
with human sample size: 100
Iterating over the LLM sample size: 100
Iterating over the LLM sample size: 200
Iterating over th

In [44]:
# Define the filename and path
filename = '../Data/5_PPI_GPT4_ns.csv'

# Export the DataFrame to a CSV file
with open(filename, 'w', newline='') as f:
    result_2.to_csv(f)


## GPT4 Predictions

In [28]:
GPT4.shape[0] 

4360

In [22]:

maxhuman = 1000
humansamples = list(range(50, 1001, 50)) 
maxllm = GPT4.shape[0] - maxhuman
maxllm = (maxllm // 100) * 100
llmsamples = list(range(50, maxllm+1, 50)) 
print(humansamples)
print(llmsamples)




[50, 100, 150, 200, 250, 300, 350, 400, 450, 500, 550, 600, 650, 700, 750, 800, 850, 900, 950, 1000]
[50, 100, 150, 200, 250, 300, 350, 400, 450, 500, 550, 600, 650, 700, 750, 800, 850, 900, 950, 1000, 1050, 1100, 1150, 1200, 1250, 1300, 1350, 1400, 1450, 1500, 1550, 1600, 1650, 1700, 1750, 1800, 1850, 1900, 1950, 2000, 2050, 2100, 2150, 2200, 2250, 2300, 2350, 2400, 2450, 2500, 2550, 2600, 2650, 2700, 2750, 2800, 2850, 2900, 2950, 3000, 3050, 3100, 3150, 3200, 3250, 3300]


In [24]:
calc_ci(GPT4,'Intervention', 100, 1000,'GPT4')

Unnamed: 0,var,n,N,pointest_ppi,lower_CI_ppi,upper_CI_ppi,lower_CI_ols,upper_CI_ols
0,Intervention,100,1000,0.045963,-0.149699,0.237044,-0.201181,0.218372


In [25]:
Covs = ['NumberOfCharacters','Boy','Girl','Woman','Man']
reps = 75
result = pd.DataFrame()
for var in Covs:
  print(f"Iterating over the variable: {var}")

  for n in humansamples:
    print(f"with human sample size: {n}")

    for N in llmsamples:
      #print(f"Iterating over the LLM sample size: {N}")

      for r in range(75):
        result = pd.concat([result, calc_ci(GPT4,var, n, N,'GPT4')])


Iterating over the variable: NumberOfCharacters
with human sample size: 50
with human sample size: 100
with human sample size: 150
with human sample size: 200
with human sample size: 250
with human sample size: 300
with human sample size: 350
with human sample size: 400
with human sample size: 450
with human sample size: 500
with human sample size: 550
with human sample size: 600
with human sample size: 650
with human sample size: 700
with human sample size: 750
with human sample size: 800
with human sample size: 850
with human sample size: 900
with human sample size: 950
with human sample size: 1000
Iterating over the variable: Boy
with human sample size: 50
with human sample size: 100
with human sample size: 150
with human sample size: 200
with human sample size: 250
with human sample size: 300
with human sample size: 350
with human sample size: 400
with human sample size: 450
with human sample size: 500
with human sample size: 550
with human sample size: 600
with human sample size: 

In [191]:
result

Unnamed: 0,var,n,N,pointest_ppi,lower_CI_ppi,upper_CI_ppi,lower_CI_ols,upper_CI_ols
0,NumberOfCharacters,50,50,0.028141,-0.077703,0.124850,-0.173364,0.051349
0,NumberOfCharacters,50,50,0.026899,-0.115545,0.169344,-0.114113,0.167912
0,NumberOfCharacters,50,50,0.050614,-0.002289,0.179843,0.002747,0.224082
0,NumberOfCharacters,50,50,-0.041688,-0.154505,0.071829,-0.153421,0.076912
0,NumberOfCharacters,50,50,0.133110,0.051499,0.245311,0.047162,0.272702
...,...,...,...,...,...,...,...,...
0,NumberOfCharacters,50,200,0.115511,-0.003154,0.229041,-0.083416,0.133544
0,NumberOfCharacters,50,200,0.062239,-0.051952,0.174004,-0.065572,0.141322
0,NumberOfCharacters,50,200,-0.111285,-0.206448,-0.015487,-0.198308,-0.010547
0,NumberOfCharacters,50,200,0.054194,-0.075967,0.200460,-0.071079,0.197978


In [26]:
# Define the filename and path
filename = '../Data/4_PPI_GPT4.csv'

# Export the DataFrame to a CSV file
with open(filename, 'w', newline='') as f:
    result.to_csv(f)


## GPT3

In [62]:
GPT3 = df.dropna(subset=["SavedGPT3"])
GPT3 = GPT3.dropna(subset=["Saved"])
maxhuman = 2000
humansamples = list(range(50, 501, 50)) + list(range(600, maxhuman+1, 100))
maxllm = GPT3.shape[0] - maxhuman
maxllm = (maxllm // 100) * 100
llmsamples = list(range(50, 501, 50)) + list(range(600, maxllm+1, 100))
print(humansamples)
print(llmsamples)
maxllm

[50, 100, 150, 200, 250, 300, 350, 400, 450, 500, 600, 700, 800, 900, 1000, 1100, 1200, 1300, 1400, 1500, 1600, 1700, 1800, 1900, 2000]
[50, 100, 150, 200, 250, 300, 350, 400, 450, 500, 600, 700, 800, 900, 1000, 1100, 1200, 1300, 1400, 1500, 1600, 1700, 1800, 1900, 2000, 2100, 2200, 2300, 2400, 2500, 2600, 2700, 2800, 2900, 3000, 3100, 3200, 3300, 3400, 3500, 3600, 3700, 3800, 3900, 4000, 4100, 4200, 4300, 4400, 4500, 4600, 4700, 4800, 4900, 5000, 5100, 5200, 5300, 5400, 5500, 5600, 5700, 5800, 5900, 6000, 6100, 6200, 6300, 6400, 6500, 6600, 6700, 6800, 6900, 7000, 7100, 7200, 7300, 7400, 7500, 7600, 7700, 7800, 7900, 8000, 8100, 8200, 8300, 8400, 8500, 8600, 8700, 8800, 8900, 9000, 9100, 9200, 9300, 9400, 9500, 9600, 9700, 9800, 9900, 10000, 10100, 10200, 10300, 10400, 10500, 10600, 10700, 10800, 10900, 11000, 11100, 11200, 11300, 11400, 11500, 11600, 11700, 11800, 11900, 12000, 12100, 12200, 12300, 12400, 12500]


12500

In [63]:
calc_ci(GPT3,'Barrier', 100, 1000)

Unnamed: 0,var,n,N,lower_CI_ppi,upper_CI_ppi,lower_CI_ols,upper_CI_ols
0,Barrier,100,1000,,,-0.282639,0.095972


In [69]:


result = pd.DataFrame()

for var in Covs:
  print(f"Iterating over the variable: {var}")

  for n in humansamples:
    print(f"with human sample size: {n}")

    for N in llmsamples:
      print(f"Iterating over the LLM sample size: {N}")
      result = pd.concat([result, calc_ci(GPT3,var, n, N,'GPT3')])

Iterating over the variable: PedPed
with human sample size: 50
Iterating over the LLM sample size: 50
Iterating over the LLM sample size: 100
Iterating over the LLM sample size: 150
Iterating over the LLM sample size: 200
Iterating over the LLM sample size: 250
Iterating over the LLM sample size: 300
Iterating over the LLM sample size: 350
Iterating over the LLM sample size: 400
Iterating over the LLM sample size: 450
Iterating over the LLM sample size: 500
Iterating over the LLM sample size: 600
Iterating over the LLM sample size: 700
Iterating over the LLM sample size: 800
Iterating over the LLM sample size: 900
Iterating over the LLM sample size: 1000
Iterating over the LLM sample size: 1100
Iterating over the LLM sample size: 1200
Iterating over the LLM sample size: 1300
Iterating over the LLM sample size: 1400
Iterating over the LLM sample size: 1500
Iterating over the LLM sample size: 1600
Iterating over the LLM sample size: 1700
Iterating over the LLM sample size: 1800
Iterating

In [70]:
# Define the filename and path
filename = '../Data/2_PPI_GPT3.csv'

# Export the DataFrame to a CSV file
with open(filename, 'w', newline='') as f:
    result.to_csv(f)

In [71]:
result

Unnamed: 0,var,n,N,lower_CI_ppi,upper_CI_ppi,lower_CI_ols,upper_CI_ols
0,PedPed,50,50,-0.285226,0.180063,-0.269762,0.198333
0,PedPed,50,100,-0.290009,0.175929,-0.269762,0.198333
0,PedPed,50,150,-0.283395,0.182548,-0.269762,0.198333
0,PedPed,50,200,-0.274435,0.189797,-0.269762,0.198333
0,PedPed,50,250,-0.276443,0.188011,-0.269762,0.198333
...,...,...,...,...,...,...,...
0,Intervention,2000,12100,-0.057678,0.014969,-0.055026,0.018531
0,Intervention,2000,12200,-0.057915,0.014731,-0.055026,0.018531
0,Intervention,2000,12300,-0.058072,0.014573,-0.055026,0.018531
0,Intervention,2000,12400,-0.058031,0.014613,-0.055026,0.018531
