#### Download packages

In [19]:
import numpy as np
import pandas as pd
import os
import openpyxl
import statsmodels.api as sm
import statsmodels.formula.api as smf
import warnings
warnings.filterwarnings("ignore", category=UserWarning, module="openpyxl")
#print("Current working directory:", os.getcwd())


In [20]:
import sys
#print(f"Python version: {sys.version}")
#print(f"Interpreter: {sys.executable}")
#print(f"Virtual environment: {sys.prefix}")

#### Import data files

In [3]:
# Importing files
path_sample_01 = "./data/sample_italy.xlsx"
path_sample_02 = "./data/sample_prolific.csv"

# Import df_llama and df_annotations
df_italy = pd.read_excel(path_sample_01)
df_prolific = pd.read_csv(path_sample_02)

print(f"Length of df_italy: {len(df_italy)}")
print(f"Length of df_prolific: {len(df_prolific)}")

print(f"Columns of df_italy: {df_italy.columns}")
print(f"Columns of df_prolific: {df_prolific.columns}")


Length of df_italy: 8760
Length of df_prolific: 8240
Columns of df_italy: Index(['ID', 't', 'd0_s1', 'impr', 'warm', 'comp', 'valence', 'statval',
       'statnum', 'statval.1', 'statnum.1', 'statchar', 'Itemtype', 'itemnum',
       'Meanval1', 'Meanval2'],
      dtype='object')
Columns of df_prolific: Index(['Unnamed: 0', 'ID', 't', 'd0_s1', 'impr', 'warm', 'comp', 'statval',
       'statnum', 'itemtype', 'x1', 'x2', 'x3', 'x4', 'x5', 'x6', 'x7', 'x8',
       'itemnum', 'Meanval1', 'Meanval2', 'int_lag', 'int_fwd'],
      dtype='object')


In [4]:
print(len(df_italy['ID'].unique()))
print(len(df_prolific['ID'].unique()))

219
206


#### Print statistics

In [5]:
#Mean values for sample 1 (Italy)
df_italy.groupby(['d0_s1','statval.1'])[['impr', 'warm', 'comp']].agg(['mean', 'std'])

Unnamed: 0_level_0,Unnamed: 1_level_0,impr,impr,warm,warm,comp,comp
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,std,mean,std,mean,std
d0_s1,statval.1,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
0,-1,-1.36103,1.492817,-1.047264,1.534472,-1.220698,1.470032
0,0,0.304155,1.196073,0.317354,1.129061,0.119899,1.119317
0,1,1.652393,1.352871,1.576115,1.301131,1.154431,1.373178
1,-1,-1.323501,1.574137,-1.191969,1.521383,-0.982884,1.640785
1,0,0.188333,1.318938,0.040185,1.218919,0.20537,1.269503
1,1,1.435041,1.512872,1.037772,1.535056,1.418432,1.458167


In [6]:
#Mean values for sample 2 (Prolific Academic)
df_prolific.groupby(['d0_s1','statval'])[['impr', 'warm', 'comp']].agg(['mean', 'std'])

Unnamed: 0_level_0,Unnamed: 1_level_0,impr,impr,warm,warm,comp,comp
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,std,mean,std,mean,std
d0_s1,statval,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
0.0,-1,-1.673853,1.395042,-1.439035,1.450029,-1.582864,1.414718
0.0,0,-0.019721,1.274463,-0.07415,1.218875,-0.107864,1.225906
0.0,1,1.292945,1.741057,1.104499,1.714035,0.947444,1.752581
1.0,-1,-1.177682,1.633904,-0.868519,1.656103,-0.97875,1.721837
1.0,0,0.331214,1.352537,0.364745,1.282799,0.304879,1.402173
1.0,1,1.382967,1.506898,1.278216,1.4537,1.099472,1.577869


In [7]:
#Select columns from df_italy
selection_italy = df_italy.loc[:, ['ID', 't', 'd0_s1', 'impr', 'warm', 'comp', 'statval.1']].copy()
selection_italy = selection_italy.rename(columns = {'statval.1': 'statval'})

#Create column with 'statval_quad' where negative =1, positive = 1, and neutral = -2.
selection_italy['statval_quad'] = np.where(selection_italy['statval'] == -1, 1, np.where(selection_italy['statval'] == 0, -2, 1))

#Include 'sample_italy_' with the ID values
selection_italy['ID'] = 'sample_italy_' + selection_italy['ID'].astype(str)

#Selection columns from df_prolific
selection_prolific = df_prolific.loc[:, ['ID', 't', 'd0_s1', 'impr', 'warm', 'comp', 'statval']].copy()

#Create column with 'statval_quad' where negative =1, positive = 1, and neutral = -2.
selection_prolific['statval_quad'] = np.where(selection_prolific['statval'] == -1, 1, np.where(selection_prolific['statval'] == 0, -2, 1))

#Include 'sample_prolific_' with the ID values
selection_prolific['ID'] = 'sample_prolific_' + selection_prolific['ID'].astype(str)


In [8]:
#Print statements
print(f"Length of selection_italy dataframe:{len(selection_italy)}")
print(f"Length of selection_prolific dataframe: {len(selection_prolific)}")

#Check if columns are the same
print(selection_italy.columns)
print(selection_prolific.columns)

#Merge
combined_data = pd.concat([selection_italy, selection_prolific])
print(f"Total length of combined_data: {len(combined_data)}")

Length of selection_italy dataframe:8760
Length of selection_prolific dataframe: 8240
Index(['ID', 't', 'd0_s1', 'impr', 'warm', 'comp', 'statval', 'statval_quad'], dtype='object')
Index(['ID', 't', 'd0_s1', 'impr', 'warm', 'comp', 'statval', 'statval_quad'], dtype='object')
Total length of combined_data: 17000


In [9]:
#Print mean values of impression, warmth, competence per leader gender and statval
combined_data.groupby(['d0_s1','statval'])[['impr', 'warm', 'comp']].agg(['mean', 'std'])

Unnamed: 0_level_0,Unnamed: 1_level_0,impr,impr,warm,warm,comp,comp
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,std,mean,std,mean,std
d0_s1,statval,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
0.0,-1,-1.511595,1.454795,-1.235827,1.506978,-1.395012,1.454765
0.0,0,0.148271,1.244633,0.128919,1.189014,0.010275,1.177003
0.0,1,1.479442,1.561949,1.349194,1.531936,1.054838,1.570414
1.0,-1,-1.252319,1.605009,-1.03403,1.596568,-0.980865,1.680602
1.0,0,0.258081,1.336959,0.19862,1.260627,0.253945,1.336442
1.0,1,1.409621,1.509959,1.155145,1.500496,1.262731,1.525909


#### Hypothesis 1

In [10]:
# Filter data for baseline measurement
baseline_data = combined_data[combined_data['t'] == 1].copy()
print(f"Length of baseline_data: {len(baseline_data)}")

Length of baseline_data: 425


In [11]:
# Define the outcome variables
outcomes = ['impr', 'warm', 'comp']

# Loop through each outcome variable
for outcome in outcomes:
    # Define the model formula
    formula = f"{outcome} ~ d0_s1 * statval + d0_s1 * statval_quad"
    
    # Fit the model
    model = smf.ols(formula, data=baseline_data).fit()
    
    # Print the summary
    print(f"Model summary for {outcome}:")
    print(model.summary())
    print("\n")

Model summary for impr:
                            OLS Regression Results                            
Dep. Variable:                   impr   R-squared:                       0.692
Model:                            OLS   Adj. R-squared:                  0.688
Method:                 Least Squares   F-statistic:                     188.0
Date:                Sun, 04 May 2025   Prob (F-statistic):          1.19e-104
Time:                        17:59:49   Log-Likelihood:                -651.50
No. Observations:                 425   AIC:                             1315.
Df Residuals:                     419   BIC:                             1339.
Df Model:                           5                                         
Covariance Type:            nonrobust                                         
                         coef    std err          t      P>|t|      [0.025      0.975]
--------------------------------------------------------------------------------------
Intercept   

### Hypothesis 3

In [12]:
#Selection statval of initial behavior (t=1)
initial_behavior = combined_data[combined_data['t'] == 1][['ID', 'd0_s1', 'statval']]

#Selection impression, warmth and competence ratings from final evaluation rating (t=40)
last_effectiveness_rating = combined_data[combined_data['t'] == 40][['ID', 'impr', 'warm', 'comp']]

#Merge the two
merged_data = pd.merge(initial_behavior, last_effectiveness_rating, on = 'ID')
print(len(merged_data))

#Separate italy_selection and prolific_selection
italy_selection = merged_data[merged_data['ID'].str.contains('sample_italy_')]
prolific_selection = merged_data[merged_data['ID'].str.contains('sample_prolific_')]

425


#### Merged data files

In [13]:
#data=merged_data
formula = "impr ~ statval * d0_s1"
model1 = smf.ols(formula, data=merged_data).fit()
print(model1.summary())


                            OLS Regression Results                            
Dep. Variable:                   impr   R-squared:                       0.006
Model:                            OLS   Adj. R-squared:                 -0.001
Method:                 Least Squares   F-statistic:                    0.8032
Date:                Sun, 04 May 2025   Prob (F-statistic):              0.493
Time:                        17:59:49   Log-Likelihood:                -882.15
No. Observations:                 425   AIC:                             1772.
Df Residuals:                     421   BIC:                             1789.
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                    coef    std err          t      P>|t|      [0.025      0.975]
---------------------------------------------------------------------------------
Intercept        -0.0597      0.133     -0.450

#### Sample data 1 (Italy)

In [14]:
#data=italy_selection
model_italy = smf.ols(formula, data=italy_selection).fit()
print(model_italy.summary())


                            OLS Regression Results                            
Dep. Variable:                   impr   R-squared:                       0.005
Model:                            OLS   Adj. R-squared:                 -0.009
Method:                 Least Squares   F-statistic:                    0.3294
Date:                Sun, 04 May 2025   Prob (F-statistic):              0.804
Time:                        17:59:49   Log-Likelihood:                -447.53
No. Observations:                 219   AIC:                             903.1
Df Residuals:                     215   BIC:                             916.6
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                    coef    std err          t      P>|t|      [0.025      0.975]
---------------------------------------------------------------------------------
Intercept         0.0893      0.184      0.486

#### Sample data 2 (Prolific Academic)

In [15]:
#data=prolific_selection
model_prolific = smf.ols(formula, data=prolific_selection).fit()
print(model_prolific.summary())


                            OLS Regression Results                            
Dep. Variable:                   impr   R-squared:                       0.024
Model:                            OLS   Adj. R-squared:                  0.010
Method:                 Least Squares   F-statistic:                     1.683
Date:                Sun, 04 May 2025   Prob (F-statistic):              0.172
Time:                        17:59:50   Log-Likelihood:                -431.33
No. Observations:                 206   AIC:                             870.7
Df Residuals:                     202   BIC:                             884.0
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                    coef    std err          t      P>|t|      [0.025      0.975]
---------------------------------------------------------------------------------
Intercept        -0.1249      0.197     -0.633

#### Simple slope testing

In [16]:
model_prolific = smf.ols('impr ~ C(statval) * d0_s1', data=prolific_selection).fit()

# Get the levels of statval
statval_levels = prolific_selection['statval'].unique()

for level in statval_levels:
    level_data = prolific_selection[prolific_selection['statval'] == level]
    level_model = smf.ols('impr ~ d0_s1', data=level_data).fit()
    b = level_model.params['d0_s1']
    se = level_model.bse['d0_s1']
    t = level_model.tvalues['d0_s1']
    p = level_model.pvalues['d0_s1']
    df = level_model.df_resid
    print(f"Effect of d0_s1 at statval = {level}: b = {b:.2f}, SE = {se:.2f}, t({df}) = {t:.2f}, p = {p:.3f}")


Effect of d0_s1 at statval = 0: b = 0.55, SE = 0.70, t(31.0) = 0.79, p = 0.437
Effect of d0_s1 at statval = 1: b = -0.86, SE = 0.46, t(74.0) = -1.84, p = 0.069
Effect of d0_s1 at statval = -1: b = 0.32, SE = 0.39, t(95.0) = 0.81, p = 0.422


In [17]:
# Get the unique levels of d0_s1
d0_s1_levels = prolific_selection['d0_s1'].unique()

for level in d0_s1_levels:
    level_data = prolific_selection[prolific_selection['d0_s1'] == level]
    level_model = smf.ols('impr ~ statval', data=level_data).fit()
    b = level_model.params['statval']
    se = level_model.bse['statval']
    t = level_model.tvalues['statval']
    p = level_model.pvalues['statval']
    df = level_model.df_resid
    print(f"Effect of statval at d0_s1 = {level}: b = {b:.2f}, SE = {se:.2f}, t({df}) = {t:.2f}, p = {p:.3f}")


Effect of statval at d0_s1 = 0.0: b = 0.46, SE = 0.20, t(101.0) = 2.26, p = 0.026
Effect of statval at d0_s1 = 1.0: b = -0.11, SE = 0.22, t(101.0) = -0.48, p = 0.632


In [18]:
# Get the unique levels of d0_s1
d0_s1_levels = prolific_selection['d0_s1'].unique()
statval_levels = [1, 0, -1]  # Values for statval that we are interested in

for level in d0_s1_levels:
    for statval_level in statval_levels:
        # Filter the data for the current d0_s1 level and statval level
        level_data = prolific_selection[(prolific_selection['d0_s1'] == level) & 
                                        (prolific_selection['statval'] == statval_level)]
        
        if len(level_data) > 0:  # Check if there is data for this combination
            level_model = smf.ols('impr ~ statval', data=level_data).fit()
            b = level_model.params['statval']
            se = level_model.bse['statval']
            t = level_model.tvalues['statval']
            p = level_model.pvalues['statval']
            df = level_model.df_resid
            
            # Print the results for each combination of d0_s1 and statval
            print(f"Effect of statval = {statval_level} at d0_s1 = {level}: b = {b:.2f}, SE = {se:.2f}, t({df}) = {t:.2f}, p = {p:.3f}")
        else:
            print(f"No data for statval = {statval_level} at d0_s1 = {level}")


Effect of statval = 1 at d0_s1 = 0.0: b = 0.15, SE = 0.15, t(37.0) = 0.99, p = 0.330
Effect of statval = 0 at d0_s1 = 0.0: b = 0.00, SE = 0.00, t(13.0) = nan, p = nan
Effect of statval = -1 at d0_s1 = 0.0: b = 0.31, SE = 0.13, t(50.0) = 2.35, p = 0.023
Effect of statval = 1 at d0_s1 = 1.0: b = -0.28, SE = 0.18, t(37.0) = -1.58, p = 0.123
Effect of statval = 0 at d0_s1 = 1.0: b = 0.00, SE = 0.00, t(18.0) = nan, p = nan
Effect of statval = -1 at d0_s1 = 1.0: b = 0.15, SE = 0.14, t(45.0) = 1.06, p = 0.295
