# Loading packages

In [2]:
import numpy as np
import pandas as pd
import glob

In [3]:
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

In [4]:
from pandas.api.types import CategoricalDtype

# Loading data

In [5]:
data= pd.read_pickle("../data/modified_exclusions/pt_replication_modified_exclusions_data.pkl")

In [6]:
data.head()

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,...,Loss_Intuition,Country,Duration_in_seconds,Language,Sample,Duration_Minutes,YoB,Awareness_of_Loss_Aversion,Financial_Stress,Presentation_Order
0,1.0,0.0,0.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,...,1.0,Bulgaria,356,Bulgarian,Direct,5.933333,1992,3,,4|6|2|3|17|18|14|8|7|15|1|12|11|5|13|16|9|10
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,...,,Bulgaria,931,Bulgarian,Direct,15.516667,1979,1,,7|17|16|11|15|18|4|9|1|13|2|10|6|14|5|12|8|3
2,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,...,1.0,Bulgaria,462,Bulgarian,Direct,7.7,1975,3,,11|8|4|7|12|18|3|14|6|16|17|1|2|13|5|15|9|10
3,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,...,,Bulgaria,412,Bulgarian,Direct,6.866667,1977,1,,16|7|6|9|12|18|10|8|5|15|2|3|4|14|11|1|17|13
4,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,...,,Bulgaria,490,Bulgarian,Direct,8.166667,1970,1,,13|4|3|7|9|18|16|8|14|1|6|12|17|10|5|2|11|15


# Testing for the key theoretical contrasts in the PT paper

In [7]:
PT_effects = {"Certainty Effect":[["1", "2"], ["3", "4"], ["7", "8"]],
            "Reflection Effect":[["3", "7"], ["4", "8"], ["5", "9"], ["6", "10"], ["16", "17"]],
             "Isolation Effect":[["4", "11"]],
             "Overweighting of small probabilities":[["5", "6"], ["9", "10"]],
             "Framing Effect":[["12", "13"]],
             "Range Adaptation":[["14", "15"]]}

In [8]:
from scipy.stats import fisher_exact

### Odds ratios for the original data

In [9]:
opt_table = pd.read_excel("../data/original_study_values.xlsx", index_col=0)

In [10]:
opt_table.head()

Unnamed: 0,Option A (estimated),Option B (estimated),Reported N,Reported Proportion A,Reported Proportion B,Value A,Value B,Probability A,Probability B,Expected Value A,Expected Value B
1,12.96,59.04,72,0.18,0.82,"2500, 2400",2400,".33, .66",1.0,2409,2400.0
2,59.76,12.24,72,0.83,0.17,2500,2400,0.33,0.34,825,816.0
3,19.0,76.0,95,0.2,0.8,4000,3000,0.8,1.0,3200,3000.0
4,61.75,33.25,95,0.65,0.35,4000,3000,0.2,0.25,800,750.0
5,9.24,56.76,66,0.14,0.86,6000,3000,0.45,0.9,2700,2700.0


### Option A and B estimated are the number of observations in each cell based on the total N and proportions reported in the paper. Because observations by definition have to be integers it makes sense to round these estimates.

In [11]:
opt_table["Option A (estimated)"] = opt_table["Option A (estimated)"].round()
opt_table["Option B (estimated)"] = opt_table["Option B (estimated)"].round()

### Run Fisher's exact test on the original data, and add the results to the dataframe

In [12]:
%%time
PT_contrasts_original_df = pd.DataFrame(columns=["Effect", "Item 1", "Item 2", "A1", "B1", "A2", "B2", "OR", "p-value"])
for key in PT_effects.keys():
    effect = PT_effects[key] 
    for i in np.arange(len(effect)):
        q1 = effect[i][0]
        q2 = effect[i][1]
        a1 = opt_table.loc[int(q1), "Option A (estimated)"]
        b1 = opt_table.loc[int(q1), "Option B (estimated)"]
        a2 = opt_table.loc[int(q2), "Option A (estimated)"]
        b2 = opt_table.loc[int(q2), "Option B (estimated)"]
        odds_ratio, p_value = fisher_exact([[a1, b1], [a2, b2]])
        PT_contrasts_original_df.loc[len(PT_contrasts_original_df)] = [key, q1, q2, a1, b1, a2, b2, odds_ratio, p_value]
        

Wall time: 68.8 ms


In [13]:
PT_contrasts_original_df["OR2"] = PT_contrasts_original_df["OR"]
PT_contrasts_original_df.loc[PT_contrasts_original_df["OR2"]<1, "OR2"] = 1/PT_contrasts_original_df.loc[PT_contrasts_original_df["OR2"]<1, "OR2"]
PT_contrasts_original_df["Significance"] = PT_contrasts_original_df["p-value"] < 0.05

## Odds ratio by country

### Run Fisher's exact test and add the results to the dataframe

In [14]:
%%time
PT_contrasts_df2 = pd.DataFrame(columns=["Country", "Effect", "Item 1", "Item 2", "A1", "B1", "A2", "B2", "OR", "p-value"])
for country in data["Country"].unique():
    df = data.loc[data["Country"]==country, :]
    for key in PT_effects.keys():
        effect = PT_effects[key] 
        for i in np.arange(len(effect)):
            q1 = effect[i][0]
            q2 = effect[i][1]
            a1 = df[q1].sum()
            b1 = df[q1].count()-df[q1].sum()
            a2 = df[q2].sum()
            b2 = df[q2].count()-df[q2].sum()
            odds_ratio, p_value = fisher_exact([[a1, b1], [a2, b2]])
            PT_contrasts_df2.loc[len(PT_contrasts_df2)] = [country, key, q1, q2, a1, b1, a2, b2, odds_ratio, p_value]
            
        

Wall time: 1.81 s


In [17]:
%%time
PT_contrasts_df2["OR2"] = PT_contrasts_df2["OR"]
PT_contrasts_df2.loc[PT_contrasts_df2["OR2"]<1, "OR2"] = 1/PT_contrasts_df2.loc[PT_contrasts_df2["OR2"]<1, "OR2"]
PT_contrasts_df2["Significance"] = PT_contrasts_df2["p-value"] < 0.05
PT_contrasts_df2["Contrast"] = [a + " vs " + b for a, b in zip(PT_contrasts_df2["Item 1"], PT_contrasts_df2["Item 2"])]

Wall time: 3.99 ms


### Add the total sample size for each country to the dataframe

In [18]:
sample_sizes = data["Country"].value_counts()

In [19]:
PT_contrasts_df2["Sample Size"] = np.nan
for country in sample_sizes.index:
    PT_contrasts_df2.loc[PT_contrasts_df2["Country"]==country, "Sample Size"] = sample_sizes[country]


In [20]:
PT_contrasts_df2.head()

Unnamed: 0,Country,Effect,Item 1,Item 2,A1,B1,A2,B2,OR,p-value,OR2,Significance,Contrast,Sample Size
0,Bulgaria,Certainty Effect,1,2,23.0,104.0,72.0,55.0,0.168937,2.425416e-10,5.919368,True,1 vs 2,127.0
1,Bulgaria,Certainty Effect,3,4,15.0,112.0,65.0,62.0,0.127747,1.080756e-11,7.827957,True,3 vs 4,127.0
2,Bulgaria,Certainty Effect,7,8,109.0,18.0,76.0,51.0,4.063596,4.697618e-06,4.063596,True,7 vs 8,127.0
3,Bulgaria,Reflection Effect,3,7,15.0,112.0,109.0,18.0,0.022117,5.761813e-35,45.214815,True,3 vs 7,127.0
4,Bulgaria,Reflection Effect,4,8,65.0,62.0,76.0,51.0,0.703523,0.206642,1.421418,False,4 vs 8,127.0


In [21]:
PT_contrasts_original_df.head()

Unnamed: 0,Effect,Item 1,Item 2,A1,B1,A2,B2,OR,p-value,OR2,Significance
0,Certainty Effect,1,2,13.0,59.0,60.0,12.0,0.044068,1.558248e-15,22.692308,True
1,Certainty Effect,3,4,19.0,76.0,62.0,33.0,0.133065,3.37366e-10,7.515152,True
2,Certainty Effect,7,8,87.0,8.0,40.0,55.0,14.953125,1.59834e-13,14.953125,True
3,Reflection Effect,3,7,19.0,76.0,87.0,8.0,0.022989,4.395039e-25,43.5,True
4,Reflection Effect,4,8,62.0,33.0,40.0,55.0,2.583333,0.002163003,2.583333,True


### Transform significance from boolean to string

In [22]:
PT_contrasts_df2["Significant"] = np.nan
PT_contrasts_df2.loc[PT_contrasts_df2["Significance"]==True, "Significant"] = "Yes"
PT_contrasts_df2.loc[PT_contrasts_df2["Significance"]==False, "Significant"] = "No"

### Adding log odds

In [23]:
PT_contrasts_df2["LogOR"] = np.log(PT_contrasts_df2["OR"])

In [24]:
PT_contrasts_original_df["LogOR"] = np.log(PT_contrasts_original_df["OR"])

### Checking the extent to which the contrast effects replicate (given sign and signficance)

In [26]:
PT_contrasts_df2["Significance2"] = PT_contrasts_df2["Significance"].astype(float)
for ix in PT_contrasts_original_df.index:
    itm1 = PT_contrasts_original_df.loc[ix, "Item 1"]
    itm2 = PT_contrasts_original_df.loc[ix, "Item 2"]
    log_sign = np.sign(PT_contrasts_original_df.loc[ix, "LogOR"])
    PT_contrasts_df2.loc[(PT_contrasts_df2["Item 1"]==itm1) & (PT_contrasts_df2["Item 2"]==itm2) & (np.sign(PT_contrasts_df2["LogOR"])!=log_sign), "Significance2"] = 0
    

In [27]:
PT_contrasts_df2.groupby("Contrast")["Significance2"].mean()

Contrast
1 vs 2      1.000000
12 vs 13    1.000000
14 vs 15    1.000000
16 vs 17    0.631579
3 vs 4      1.000000
3 vs 7      1.000000
4 vs 11     1.000000
4 vs 8      0.157895
5 vs 6      1.000000
5 vs 9      1.000000
6 vs 10     0.842105
7 vs 8      1.000000
9 vs 10     1.000000
Name: Significance2, dtype: float64

In [28]:
PT_contrasts_df2.groupby("Effect")["Significance2"].mean()

Effect
Certainty Effect                        1.000000
Framing Effect                          1.000000
Isolation Effect                        1.000000
Overweighting of small probabilities    1.000000
Range Adaptation                        1.000000
Reflection Effect                       0.726316
Name: Significance2, dtype: float64

In [29]:
PT_contrasts_df2["Significance2"].mean()

0.8947368421052632

In [30]:
PT_contrasts_df2.groupby("Country")["Significance2"].mean().sort_values(ascending=False)

Country
Australia         1.000000
Mainland China    1.000000
Hong Kong         1.000000
Denmark           0.923077
Sweden            0.923077
Spain             0.923077
Serbia            0.923077
Norway            0.923077
Italy             0.923077
UK                0.923077
Hungary           0.923077
Austria           0.846154
Belgium           0.846154
USA               0.846154
Germany           0.846154
Slovenia          0.846154
Ireland           0.846154
Chile             0.769231
Bulgaria          0.769231
Name: Significance2, dtype: float64

# Quantifying differences in contrast effects between the original data and the country samples

In [31]:
PT_contrasts_df2.head()

Unnamed: 0,Country,Effect,Item 1,Item 2,A1,B1,A2,B2,OR,p-value,OR2,Significance,Contrast,Sample Size,Significant,LogOR,Significance2
0,Bulgaria,Certainty Effect,1,2,23.0,104.0,72.0,55.0,0.168937,2.425416e-10,5.919368,True,1 vs 2,127.0,Yes,-1.77823,1.0
1,Bulgaria,Certainty Effect,3,4,15.0,112.0,65.0,62.0,0.127747,1.080756e-11,7.827957,True,3 vs 4,127.0,Yes,-2.057702,1.0
2,Bulgaria,Certainty Effect,7,8,109.0,18.0,76.0,51.0,4.063596,4.697618e-06,4.063596,True,7 vs 8,127.0,Yes,1.402068,1.0
3,Bulgaria,Reflection Effect,3,7,15.0,112.0,109.0,18.0,0.022117,5.761813e-35,45.214815,True,3 vs 7,127.0,Yes,-3.811425,1.0
4,Bulgaria,Reflection Effect,4,8,65.0,62.0,76.0,51.0,0.703523,0.206642,1.421418,False,4 vs 8,127.0,No,-0.351655,0.0


In [32]:
PT_contrasts_original_df.head()

Unnamed: 0,Effect,Item 1,Item 2,A1,B1,A2,B2,OR,p-value,OR2,Significance,LogOR
0,Certainty Effect,1,2,13.0,59.0,60.0,12.0,0.044068,1.558248e-15,22.692308,True,-3.122026
1,Certainty Effect,3,4,19.0,76.0,62.0,33.0,0.133065,3.37366e-10,7.515152,True,-2.016921
2,Certainty Effect,7,8,87.0,8.0,40.0,55.0,14.953125,1.59834e-13,14.953125,True,2.70492
3,Reflection Effect,3,7,19.0,76.0,87.0,8.0,0.022989,4.395039e-25,43.5,True,-3.772761
4,Reflection Effect,4,8,62.0,33.0,40.0,55.0,2.583333,0.002163003,2.583333,True,0.949081


### Compute the difference in log-odds between the original results and the country specific results transformed so that larger effects are positive and smaller effects are negative, irrespective of original sign.

In [33]:
PT_contrasts_df2["LogOR_difference"] = np.nan
for a, b in zip(PT_contrasts_original_df["Item 1"], PT_contrasts_original_df["Item 2"]): # Cycle through the items
    if np.sign(PT_contrasts_original_df.loc[(PT_contrasts_original_df["Item 1"]==a) & (PT_contrasts_original_df["Item 2"]==b),
                         "LogOR"]).values[0] == 1: # If the original log-odds are positive 
        PT_contrasts_df2.loc[(PT_contrasts_df2["Item 1"]==a) & (PT_contrasts_df2["Item 2"]==b),
                             "LogOR_difference"] =     PT_contrasts_df2.loc[
            (PT_contrasts_df2["Item 1"]==a) & (PT_contrasts_df2["Item 2"]==b),
                             "LogOR"] - PT_contrasts_original_df.loc[
            (PT_contrasts_original_df["Item 1"]==a) & (PT_contrasts_original_df["Item 2"]==b),
                             "LogOR"].values[0] # Compute the difference between country-effects and original
    else: # If the original log-odds are negative
            PT_contrasts_df2.loc[(PT_contrasts_df2["Item 1"]==a) & (PT_contrasts_df2["Item 2"]==b),
                             "LogOR_difference"] =     (PT_contrasts_df2.loc[
            (PT_contrasts_df2["Item 1"]==a) & (PT_contrasts_df2["Item 2"]==b),
                             "LogOR"] - PT_contrasts_original_df.loc[
            (PT_contrasts_original_df["Item 1"]==a) & (PT_contrasts_original_df["Item 2"]==b),
                             "LogOR"].values[0]) * -1 # Compute the difference between country-effects and original and multiply by negative 1

In [34]:
PT_contrasts_df2.head()

Unnamed: 0,Country,Effect,Item 1,Item 2,A1,B1,A2,B2,OR,p-value,OR2,Significance,Contrast,Sample Size,Significant,LogOR,Significance2,LogOR_difference
0,Bulgaria,Certainty Effect,1,2,23.0,104.0,72.0,55.0,0.168937,2.425416e-10,5.919368,True,1 vs 2,127.0,Yes,-1.77823,1.0,-1.343796
1,Bulgaria,Certainty Effect,3,4,15.0,112.0,65.0,62.0,0.127747,1.080756e-11,7.827957,True,3 vs 4,127.0,Yes,-2.057702,1.0,0.04078
2,Bulgaria,Certainty Effect,7,8,109.0,18.0,76.0,51.0,4.063596,4.697618e-06,4.063596,True,7 vs 8,127.0,Yes,1.402068,1.0,-1.302852
3,Bulgaria,Reflection Effect,3,7,15.0,112.0,109.0,18.0,0.022117,5.761813e-35,45.214815,True,3 vs 7,127.0,Yes,-3.811425,1.0,0.038664
4,Bulgaria,Reflection Effect,4,8,65.0,62.0,76.0,51.0,0.703523,0.206642,1.421418,False,4 vs 8,127.0,No,-0.351655,0.0,-1.300735


In [35]:
PT_contrasts_df2.groupby("Effect")["LogOR_difference"].median()

Effect
Certainty Effect                       -1.170302
Framing Effect                         -0.844377
Isolation Effect                       -0.105691
Overweighting of small probabilities   -0.889924
Range Adaptation                       -0.429669
Reflection Effect                      -0.863325
Name: LogOR_difference, dtype: float64

In [36]:
PT_contrasts_df2["Replicates"] = np.nan
PT_contrasts_df2.loc[PT_contrasts_df2["Significance2"]==1, "Replicates"] = "Yes"
PT_contrasts_df2.loc[PT_contrasts_df2["Significance2"]==0, "Replicates"] = "No"

In [37]:
PT_contrasts_df2.head()

Unnamed: 0,Country,Effect,Item 1,Item 2,A1,B1,A2,B2,OR,p-value,OR2,Significance,Contrast,Sample Size,Significant,LogOR,Significance2,LogOR_difference,Replicates
0,Bulgaria,Certainty Effect,1,2,23.0,104.0,72.0,55.0,0.168937,2.425416e-10,5.919368,True,1 vs 2,127.0,Yes,-1.77823,1.0,-1.343796,Yes
1,Bulgaria,Certainty Effect,3,4,15.0,112.0,65.0,62.0,0.127747,1.080756e-11,7.827957,True,3 vs 4,127.0,Yes,-2.057702,1.0,0.04078,Yes
2,Bulgaria,Certainty Effect,7,8,109.0,18.0,76.0,51.0,4.063596,4.697618e-06,4.063596,True,7 vs 8,127.0,Yes,1.402068,1.0,-1.302852,Yes
3,Bulgaria,Reflection Effect,3,7,15.0,112.0,109.0,18.0,0.022117,5.761813e-35,45.214815,True,3 vs 7,127.0,Yes,-3.811425,1.0,0.038664,Yes
4,Bulgaria,Reflection Effect,4,8,65.0,62.0,76.0,51.0,0.703523,0.206642,1.421418,False,4 vs 8,127.0,No,-0.351655,0.0,-1.300735,No


In [38]:
display(PT_contrasts_df2["LogOR_difference"].mean())
display(PT_contrasts_df2["LogOR_difference"].std())

-0.8296722500434579

0.7594954908084598

In [39]:
display(PT_contrasts_df2.groupby("Effect")["LogOR_difference"].mean())
display(PT_contrasts_df2.groupby("Effect")["LogOR_difference"].std())

Effect
Certainty Effect                       -0.933805
Framing Effect                         -0.885090
Isolation Effect                       -0.155327
Overweighting of small probabilities   -0.821114
Range Adaptation                       -0.367529
Reflection Effect                      -0.986830
Name: LogOR_difference, dtype: float64

Effect
Certainty Effect                        0.724230
Framing Effect                          0.472654
Isolation Effect                        0.186622
Overweighting of small probabilities    0.986486
Range Adaptation                        0.417731
Reflection Effect                       0.750950
Name: LogOR_difference, dtype: float64

### Saving country contrasts

In [41]:
PT_contrasts_df2.to_excel("../output/contrasts_by_country.xlsx", index=False)
PT_contrasts_df2.to_csv("../output/contrasts_by_country.csv", index=False)

### Saving original contrasts

In [42]:
PT_contrasts_original_df.to_excel("../output/original_contrasts.xlsx", index=False)
PT_contrasts_original_df.to_csv("../output/original_contrasts.csv", index=False)