In [2]:
import pandas as pd
import numpy as np
from ppi_py import ppi_ols_ci, classical_ols_ci, ppi_ols_pointestimate

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


In [3]:
df1 = pd.read_csv("https://raw.githubusercontent.com/davidbroska/IntegrativeExperimentsGAI/main/Data/JoinedAwadTakemoto/Joined_GPT4.csv")
df1.shape
df = df1.dropna()
df.shape

(17013, 32)

In [4]:
def calc_ci(var, n, N):
  """

  Args:
      var: The name of the variable.
      n: The sample size of the human responses.
      N: The sample size of the responses predicted by the algorithm.

  Returns:
      A pandas DataFrame with the following columns:
        * var: The name of the variable.
        * n: The sample size.
        * N: The population size.
        * lower_CI_ppi: The lower bound of the PPI confidence interval.
        * upper_CI_ppi: The upper bound of the PPI confidence interval.
        * lower_CI_ols: The lower bound of the OLS confidence interval.
        * upper_CI_ols: The upper bound of the OLS confidence interval.
  """

  seed = 123

  sub_df = df.sample(n+N, random_state=seed, ignore_index=True)
  df_people = sub_df.iloc[:n]
  df_gpt = sub_df.iloc[n:]

  Xn = np.ones((n,2))
  Xn[:,1] = df_people[var]
  Yn_ppl = df_people['Saved'].to_numpy()
  Yn_gpt = df_people['SavedGPT4'].to_numpy()

  XN = np.ones((N,2))
  XN[:,1] = df_gpt[var]
  YN_gpt = df_gpt['SavedGPT4'].to_numpy()

  lower_CI_ppi, upper_CI_ppi = ppi_ols_ci(Xn, Yn_ppl, Yn_gpt, XN, YN_gpt)
  lower_CI_ols, upper_CI_ols = classical_ols_ci(Xn, Yn_ppl)

  # Create and return the output DataFrame
  output_df = pd.DataFrame({
      "var": var,
      "n": n,
      "N": N,
      "lower_CI_ppi": lower_CI_ppi[1],
      "upper_CI_ppi": upper_CI_ppi[1],
      "lower_CI_ols": lower_CI_ols[1],
      "upper_CI_ols": upper_CI_ols[1]}, index=[0])
  return output_df

In [6]:
Covs = ['PedPed', 'Barrier', 'CrossingSignal', 'NumberOfCharacters',
        'DiffNumberOFCharacters', 'LeftHand', 'Man', 'Woman', 'Pregnant',
        'Stroller', 'OldMan', 'OldWoman', 'Boy', 'Girl', 'Homeless',
        'LargeWoman', 'LargeMan', 'Criminal', 'MaleExecutive',
        'FemaleExecutive', 'FemaleAthlete', 'MaleAthlete', 'FemaleDoctor',
        'MaleDoctor', 'Dog', 'Cat', 'Intervention']
result = pd.DataFrame()

for var in range(1,len(Covs)):
  print(f"Iterating over the variable: {Covs[var]}")

  for n in range(50, 10**3+1, 50):
    #print(f"Iterating over the human sample size: {n}")

    for N in range(500, 10**4+1, 500):

      result = pd.concat([result, calc_ci(Covs[var], n, N)])

Iterating over the variable: Barrier
Iterating over the variable: CrossingSignal
Iterating over the variable: NumberOfCharacters
Iterating over the variable: DiffNumberOFCharacters
Iterating over the variable: LeftHand
Iterating over the variable: Man
Iterating over the variable: Woman
Iterating over the variable: Pregnant
Iterating over the variable: Stroller
Iterating over the variable: OldMan
Iterating over the variable: OldWoman
Iterating over the variable: Boy
Iterating over the variable: Girl
Iterating over the variable: Homeless
Iterating over the variable: LargeWoman
Iterating over the variable: LargeMan
Iterating over the variable: Criminal
Iterating over the variable: MaleExecutive
Iterating over the variable: FemaleExecutive
Iterating over the variable: FemaleAthlete
Iterating over the variable: MaleAthlete
Iterating over the variable: FemaleDoctor
Iterating over the variable: MaleDoctor
Iterating over the variable: Dog
Iterating over the variable: Cat
Iterating over the var

In [8]:
# Define the filename and path
filename = '3_result.csv'

# Export the DataFrame to a CSV file
with open(filename, 'w', newline='') as f:
    result.to_csv(f)