# Compute effective sample sizes

In this notebook, we compute the effective sample size ($n_0$) for the nine attributes of the Moral Machine experiment.

In [1]:
import ppi_py
import pandas as pd
import numpy as np
import PythonFunctions as pf
from scipy import stats

rhos = pd.read_csv("../Data/5_rho.csv")
rhos

Unnamed: 0,y,x,beta_ppi,beta_hum,beta_sil,beta_sil_sm,se_ppi,se_hum,se_sil,lower_ppi,upper_ppi,lower_hum,upper_hum,lower_sil,upper_sil,ppi_corr,n,N
0,gpt4turbo_wp_Saved,Intervention,0.081381,0.081581,0.081985,0.081985,0.001163,0.001173,0.002817,0.079101,0.08366,0.079282,0.083881,0.076464,0.087507,0.352677,1000000,163962
1,gpt4turbo_wp_Saved,Barrier,0.10581,0.10593,0.484486,0.484486,0.001757,0.001768,0.003702,0.102366,0.109254,0.102464,0.109396,0.477231,0.491741,0.314138,1000000,163962
2,gpt4turbo_wp_Saved,Gender,0.13401,0.134204,0.197077,0.197077,0.002694,0.002707,0.006378,0.128728,0.139289,0.128897,0.13951,0.184577,0.209576,0.26276,1000000,163962
3,gpt4turbo_wp_Saved,Fitness,0.174661,0.174951,0.016098,0.016098,0.002823,0.002839,0.006787,0.16913,0.180196,0.169386,0.180516,0.002797,0.029399,0.28312,1000000,163962
4,gpt4turbo_wp_Saved,Social Status,0.248259,0.248827,0.026267,0.026267,0.007595,0.007623,0.018429,0.233386,0.263158,0.233887,0.263767,-0.009853,0.062387,0.229833,1000000,163962
5,gpt4turbo_wp_Saved,CrossingSignal,0.378954,0.378903,0.657856,0.657856,0.001954,0.001967,0.003958,0.375125,0.382783,0.375048,0.382759,0.6501,0.665613,0.308987,1000000,163962
6,gpt4turbo_wp_Saved,Age,0.506392,0.506359,0.182285,0.182285,0.00244,0.002446,0.006541,0.501611,0.511174,0.501564,0.511154,0.169465,0.195105,0.198339,1000000,163962
7,gpt4turbo_wp_Saved,Utilitarian,0.572248,0.572312,0.552874,0.552874,0.002256,0.002262,0.005516,0.567826,0.57667,0.567878,0.576746,0.542063,0.563686,0.191972,1000000,163962
8,gpt4turbo_wp_Saved,Species,0.684679,0.684791,0.83383,0.83383,0.002011,0.002011,0.003904,0.680738,0.68862,0.680849,0.688733,0.826178,0.841482,0.049155,1000000,163962


In [2]:
# function to compute effective sample size
def n0 (n, N, rho):
    
    n0 = (n*(n+N)) / (n+(N*(1-rho**2)))

    rounded = int(round(n0, 0))

    return rounded


In [4]:
# Define labels for attributes
labels = pd.DataFrame({
    "Attribute": [
        "Species",
        "Social Status",
        "Utilitarian",
        "Age",
        "Gender",
        "Fitness",
        "CrossingSignal",
        "Barrier",
        "Intervention"
    ],
    "Label": [
        "Sparing humans vs animals",
        "Sparing high status vs low status",
        "Sparing more characters vs fewer",
        "Sparing the young vs old",
        "Sparing women vs men",
        "Sparing the fit vs the large",
        "Sparing the lawful vs unlawful",
        "Sparing pedestrians vs passengers",
        "Preference for inaction vs intervention"
    ]
})

# Compute effective sample size for each attribute
df = pd.DataFrame({
    "x": rhos["x"],
    "ppi_corr": round(rhos["ppi_corr"],3),
    "n0": rhos["ppi_corr"].apply(lambda rho: n0(n=10**4, N=10**5, rho=rho))
})

# Merge labels and effective sample size
joined_df = (
    pd.merge(labels, df, left_on="Attribute", right_on="x", how="inner")
      .drop(columns=["x", "Attribute"])
      .sort_values(by="ppi_corr", ascending=False)
)
print(joined_df)

# Save to latex table
joined_df.to_latex(
    buf = "../Figures/7_n0MME.tex", 
    index=False, 
    float_format="{:.3f}".format, 
    header=["Label", "PPI correlation", "Effective sample size"],
    caption="Effective sample size for each attribute in the Moral Machine experiment",
    label="tab:n0MME",
    column_format="lcc"
)

                                     Label  ppi_corr     n0
8  Preference for inaction vs intervention     0.353  11275
7        Sparing pedestrians vs passengers     0.314  10986
6           Sparing the lawful vs unlawful     0.309  10950
5             Sparing the fit vs the large     0.283  10786
4                     Sparing women vs men     0.263  10670
1        Sparing high status vs low status     0.230  10504
3                 Sparing the young vs old     0.198  10371
2         Sparing more characters vs fewer     0.192  10347
0                Sparing humans vs animals     0.049  10022
