In [1]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
import statsmodels.formula.api as smf

In [3]:
prolific_df = pd.read_csv("../data/s2/behavioral_df.csv")
prolific_df["enjoyment_before"] = prolific_df.groupby("id").enjoyment.shift()

In [4]:
prolific_df.shape

(3675, 22)

# Result of mixed effect model

In [6]:
def fit_mxied_effect_regression_model(independent=None, dependent=None, 
                                      random_intercept=True, random_slope=True, scaled=True):

    vars = independent + [dependent, "id"]
    test_df =  prolific_df[vars].copy()
    
    for var in vars:
        if var != "id":
            test_df.loc[:, var+"_scaled"] = (test_df[var] - test_df[var].mean())/test_df[var].std()

    if scaled == True:
        formula = f"{dependent}_scaled ~  {' + '.join([x+'_scaled' for x in independent])}"
        print(formula)
        if random_intercept & random_slope:
            md = smf.mixedlm(formula, 
                             test_df.dropna(), 
                              groups=test_df.dropna()["id"],
                            re_formula=f"~ {' + '.join([x+'_scaled' for x in independent])}")
            
        elif random_intercept:
            md = smf.mixedlm(formula, 
                             test_df.dropna(), 
                              groups=test_df.dropna()["id"])
        else:
            md = smf.mixedlm(formula, 
                             test_df.dropna())
    else:
        formula = f"{dependent} ~  {' + '.join([x+'_scaled' for x in independent])}"
        print(formula)
        if random_intercept & random_slope:
            md = smf.mixedlm(formula, 
                             test_df.dropna(), 
                              groups=test_df.dropna()["id"],
                            re_formula=f"~ {' + '.join([x+'_scaled' for x in independent])}")
            
        elif random_intercept:
            md = smf.mixedlm(formula, 
                             test_df.dropna(), 
                              groups=test_df.dropna()["id"])
        else:
            md = smf.mixedlm(formula, 
                             test_df.dropna())
        
    mdf = md.fit()
    print(mdf.summary())

In [7]:
fit_mxied_effect_regression_model(independent=["choice_index"], dependent="enjoyment", 
                                      random_intercept=True, random_slope=True, scaled=True)

enjoyment_scaled ~  choice_index_scaled
                 Mixed Linear Model Regression Results
Model:                MixedLM    Dependent Variable:    enjoyment_scaled
No. Observations:     3675       Method:                REML            
No. Groups:           245        Scale:                 0.7081          
Min. group size:      15         Log-Likelihood:        -4852.2878      
Max. group size:      15         Converged:             Yes             
Mean group size:      15.0                                              
------------------------------------------------------------------------
                                Coef. Std.Err.   z   P>|z| [0.025 0.975]
------------------------------------------------------------------------
Intercept                       0.000    0.035 0.000 1.000 -0.069  0.069
choice_index_scaled             0.107    0.017 6.274 0.000  0.073  0.140
Group Var                       0.258    0.034                          
Group x choice_index_scaled C

In [8]:
fit_mxied_effect_regression_model(independent=["choice_index"], dependent="step_size_after", 
                                      random_intercept=True, random_slope=True, scaled=True)

step_size_after_scaled ~  choice_index_scaled
                  Mixed Linear Model Regression Results
Model:              MixedLM   Dependent Variable:   step_size_after_scaled
No. Observations:   3430      Method:               REML                  
No. Groups:         245       Scale:                0.7218                
Min. group size:    14        Log-Likelihood:       -4592.8306            
Max. group size:    14        Converged:            Yes                   
Mean group size:    14.0                                                  
--------------------------------------------------------------------------
                                Coef.  Std.Err.   z    P>|z| [0.025 0.975]
--------------------------------------------------------------------------
Intercept                       -0.023    0.031 -0.733 0.464 -0.083  0.038
choice_index_scaled             -0.195    0.023 -8.419 0.000 -0.240 -0.149
Group Var                        0.179    0.026                          

In [9]:
fit_mxied_effect_regression_model(independent=["enjoyment"], dependent="step_size_after", 
                                      random_intercept=True, random_slope=True, scaled=True)

step_size_after_scaled ~  enjoyment_scaled
                 Mixed Linear Model Regression Results
Model:              MixedLM  Dependent Variable:  step_size_after_scaled
No. Observations:   3430     Method:              REML                  
No. Groups:         245      Scale:               0.7242                
Min. group size:    14       Log-Likelihood:      -4512.5258            
Max. group size:    14       Converged:           Yes                   
Mean group size:    14.0                                                
------------------------------------------------------------------------
                             Coef.  Std.Err.    z    P>|z| [0.025 0.975]
------------------------------------------------------------------------
Intercept                    -0.004    0.029  -0.123 0.902 -0.061  0.054
enjoyment_scaled             -0.335    0.019 -17.584 0.000 -0.372 -0.298
Group Var                     0.155    0.023                            
Group x enjoyment_scaled C

In [10]:
vars = ["id", 
        "choice_index", 
        "step_size_after",
       "enjoyment",
       'curiosity_je', 'curiosity_ds',
       'curiosity_st', 'curiosity_sc', 'curiosity_ts']

test_df = prolific_df[vars].copy()

for var in ["step_size_after", "choice_index", "enjoyment", 'curiosity_je', 'curiosity_ds',
       'curiosity_st', 'curiosity_sc', 'curiosity_ts']:
    test_df.loc[:, var+"_scaled"] = (test_df[var] - test_df[var].mean())/test_df[var].std()

md = smf.mixedlm("step_size_after_scaled ~  choice_index_scaled + enjoyment_scaled + curiosity_je_scaled + curiosity_ds_scaled" +
                 " + curiosity_st_scaled + curiosity_sc_scaled + curiosity_ts_scaled", 
                 test_df.dropna(), 
                  groups=test_df.dropna()["id"],
                 re_formula="~ choice_index_scaled + enjoyment_scaled")
mdf = md.fit()
print(mdf.summary())

                        Mixed Linear Model Regression Results
Model:                  MixedLM       Dependent Variable:       step_size_after_scaled
No. Observations:       3430          Method:                   REML                  
No. Groups:             245           Scale:                    0.6459                
Min. group size:        14            Log-Likelihood:           -4426.8713            
Max. group size:        14            Converged:                Yes                   
Mean group size:        14.0                                                          
--------------------------------------------------------------------------------------
                                           Coef.  Std.Err.    z    P>|z| [0.025 0.975]
--------------------------------------------------------------------------------------
Intercept                                  -0.021    0.029  -0.745 0.456 -0.077  0.035
choice_index_scaled                        -0.154    0.022  -6.997 0

In [12]:
vars = ["id", 
        "choice_index", 
        "step_size_before",
       "enjoyment", 
       'curiosity_je', 'curiosity_ds',
       'curiosity_st', 'curiosity_sc', 'curiosity_ts']

test_df = prolific_df[vars].copy()

for var in ["step_size_before", "choice_index", "enjoyment", 'curiosity_je']:
    test_df.loc[:, var+"_scaled"] = (test_df[var] - test_df[var].mean())/test_df[var].std()

md = smf.mixedlm("enjoyment_scaled ~  choice_index_scaled + step_size_before_scaled  + curiosity_je_scaled " +
                 " + step_size_before_scaled:curiosity_je_scaled", 
                 test_df.dropna(), 
                  groups=test_df.dropna()["id"],
                 re_formula="~ choice_index_scaled ")
mdf = md.fit()
print(mdf.summary())
print(-2 * mdf.llf + np.log(mdf.nobs) * (mdf.df_modelwc))

                        Mixed Linear Model Regression Results
Model:                    MixedLM         Dependent Variable:         enjoyment_scaled
No. Observations:         3430            Method:                     REML            
No. Groups:               245             Scale:                      0.6942          
Min. group size:          14              Log-Likelihood:             -4499.6649      
Max. group size:          14              Converged:                  Yes             
Mean group size:          14.0                                                        
--------------------------------------------------------------------------------------
                                            Coef.  Std.Err.   z    P>|z| [0.025 0.975]
--------------------------------------------------------------------------------------
Intercept                                    0.008    0.034  0.238 0.812 -0.059  0.076
choice_index_scaled                          0.083    0.018  4.625 0