In [1]:
import sys
import numpy as np
import pandas as pd
import seaborn as sns
import pyarrow
from datetime import datetime
import esm_simulation, utilities
import matplotlib as mpl
import matplotlib.pyplot as plt
from matplotlib.pyplot import figure
import matplotlib.cm as cm
import itertools
import statsmodels.api as sm
from statsmodels.formula.api import ols
import statsmodels.formula.api as smf
from statsmodels.iolib.summary2 import summary_col
from patsy.builtins import *
pv=!{sys.executable} --version
print(pv[0],'numpy ==', np.__version__, 'pyarrow ==',pyarrow.__version__, 'seaborn == ', sns.__version__, 'statsmodels == ', sm.__version__)



Python 3.9.7 numpy == 1.23.1 pyarrow == 6.0.1 seaborn ==  0.11.2 statsmodels ==  0.12.2


In [2]:
# options
pd.set_option('display.max_columns', 100)
pd.set_option('display.float_format', lambda x: '%.3f' % x)
mpl.rcParams['figure.dpi'] = 300
sns.set_style("ticks")
sns.set_palette("cubehelix")
font_scale = 3
sns.set_context("paper", font_scale=font_scale, rc={"lines.linewidth": 3, 'aspect':1})

In [3]:
data_main = pd.read_parquet('esm_simulation.Simulation_main_agg.parquet')
print("main", data_main.shape)

data_ext = pd.read_parquet('esm_simulation.Simulation_ext_agg.parquet')
print("ext", data_ext.shape)

data_robust = pd.read_parquet('esm_simulation.Simulation_robust_agg.parquet')
print("robust", data_robust.shape)

print(data_main.columns)

main (9720, 20)
ext (506250, 20)
robust (38880, 20)
Index(['Configuration', 'Run', 'Adoption', 'Esm Interactivity',
       'Initial Esm Adoption', 'Initial Metaknowledge Accuracy',
       'Interruptions', 'Leakiness', 'Metaknowledge Accuracy',
       'Metaknowledge Decay Rate', 'Organization Size', 'Performance',
       'Realized Metaknowledge', 'Seed', 'Simulation Time', 'Time Step',
       'Worker Aspiration', 'Worker Dependency',
       'Worker Interruption Tolerance', 'Worker Transparency Preference'],
      dtype='object')


In [4]:
def regression_analysis(dv, data, extra_variables=[], interaction_terms=False):
    formula = f'''{dv} ~ 
            Q("Esm Interactivity") + Q("Worker Interruption Tolerance") + Q("Worker Transparency Preference")
          + Q("Initial Metaknowledge Accuracy") + Q("Worker Dependency") + Q("Initial Esm Adoption")
        '''
    if interaction_terms:
        formula += ' + ' + 'Q("Esm Interactivity"):Q("Worker Interruption Tolerance") + Q("Esm Interactivity"):Q("Worker Transparency Preference") + Q("Worker Interruption Tolerance"):Q("Worker Transparency Preference")'
    if extra_variables:
        formula += ' + ' + ' + '.join([f'Q("{v}")'for v in extra_variables])    
    mod = smf.ols(formula=formula, data=data)
    res = mod.fit(cov_type='HC1')
    return res

In [5]:
mod_main = regression_analysis("Performance", data_main)
mod_main_int = regression_analysis("Performance", data_main, interaction_terms=True)
mod_ext = regression_analysis("Performance", data_ext, extra_variables=["Organization Size", "Simulation Time"])
mod_ext_int = regression_analysis("Performance", data_ext, extra_variables=["Organization Size", "Simulation Time"], interaction_terms=True)

order = ['Q("Esm Interactivity")', 'Q("Worker Interruption Tolerance")', 'Q("Worker Transparency Preference")', 
        'Q("Esm Interactivity"):Q("Worker Interruption Tolerance")', 'Q("Esm Interactivity"):Q("Worker Transparency Preference")', 'Q("Worker Interruption Tolerance"):Q("Worker Transparency Preference")',
        'Q("Initial Metaknowledge Accuracy")', 'Q("Worker Dependency")', 'Q("Initial Esm Adoption")',
        'Q("Organization Size")', 'Q("Simulation Time")']
table = summary_col([mod_main,  mod_ext, mod_main_int, mod_ext_int], stars=True, float_format='%0.4f', 
        model_names=['Main', 'Ext' ,'Main_int', 'Ext_int'],
        info_dict={'N':lambda x: "{0:d}".format(int(x.nobs))}, regressor_order=order, drop_omitted=False)
table

0,1,2,3,4
,Main,Ext,Main_int,Ext_int
"Q(""Esm Interactivity"")",-0.1110***,-0.0701***,0.0742***,-0.0852***
,(0.0045),(0.0009),(0.0092),(0.0018)
"Q(""Worker Interruption Tolerance"")",0.0021***,0.0025***,0.0007***,-0.0002***
,(0.0000),(0.0000),(0.0001),(0.0000)
"Q(""Worker Transparency Preference"")",0.0106***,0.0086***,0.0053***,0.0006***
,(0.0003),(0.0000),(0.0005),(0.0001)
"Q(""Esm Interactivity""):Q(""Worker Interruption Tolerance"")",,,-0.0010***,0.0014***
,,,(0.0002),(0.0000)
"Q(""Esm Interactivity""):Q(""Worker Transparency Preference"")",,,-0.0249***,-0.0067***


In [6]:
print(table, file=open("regression_table.txt", "w"))

In [7]:
# export table to html format
table_html = table.as_html()
with open('regression_table.html', 'w') as f:
    f.write(table_html)

# alternative assumptions

In [8]:
mod_robust = regression_analysis("Performance", data_robust, extra_variables=[ 'Worker Aspiration', 'Metaknowledge Decay Rate'])
mod_robust_int = regression_analysis("Performance", data_robust, interaction_terms=True, extra_variables=[ 'Worker Aspiration', 'Metaknowledge Decay Rate'])

In [9]:
order = ['Q("Esm Interactivity")', 'Q("Worker Interruption Tolerance")', 'Q("Worker Transparency Preference")', 
        'Q("Esm Interactivity"):Q("Worker Interruption Tolerance")', 'Q("Esm Interactivity"):Q("Worker Transparency Preference")', 'Q("Worker Interruption Tolerance"):Q("Worker Transparency Preference")',
        'Q("Initial Metaknowledge Accuracy")', 'Q("Worker Dependency")', 'Q("Initial Esm Adoption")',
        'Q("Organization Size")', 'Q("Simulation Time")', 'Q("Metaknowledge Decay Rate")', 'Q("Worker Aspiration")']

table = summary_col([mod_main,  mod_robust], stars=True, float_format='%0.4f', 
        model_names=['Main', 'Robust'],
        info_dict={'N':lambda x: "{0:d}".format(int(x.nobs))}, regressor_order=order, drop_omitted=False)
table

0,1,2
,Main,Robust
"Q(""Esm Interactivity"")",-0.1110***,0.0029***
,(0.0045),(0.0008)
"Q(""Worker Interruption Tolerance"")",0.0021***,0.0007***
,(0.0000),(0.0000)
"Q(""Worker Transparency Preference"")",0.0106***,0.0030***
,(0.0003),(0.0001)
"Q(""Initial Metaknowledge Accuracy"")",0.3526***,0.0822***
,(0.0144),(0.0032)
"Q(""Worker Dependency"")",-0.1920***,-0.0044


In [10]:
print(table, file=open("regression_table_alternative_assumptions.txt", "w"))

In [11]:
# export table to html format
table_html = table.as_html()
with open('regression_table_alternative_assumptions.html', 'w') as f:
    f.write(table_html)