# core

> Python package that helps collect outputs of statistical analyses into tables and export them to LaTex and pdf (similar the ``estout`` and ``esttab`` commands in Stata).

In [None]:
#| default_exp core

In [None]:
#| hide 
from nbdev.showdoc import *
from nbdev.test import *

In [None]:
#| export
from typing import List, Dict, Tuple
import importlib
import numpy as np
import pandas as pd

import statsmodels.api as sm
from linearmodels import PanelOLS

from estout.utils import *

In [None]:
np.random.seed(0)
df = pd.DataFrame(np.random.rand(9,2), columns=['y','x'])
df['firmid'] = [1]*3 + [2]*3 + [3]*3
df['time'] = [1,2,3]*3
df['constant'] = 1
df = df.set_index(['firmid','time'])
df

Unnamed: 0_level_0,Unnamed: 1_level_0,y,x,constant
firmid,time,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,1,0.548814,0.715189,1
1,2,0.602763,0.544883,1
1,3,0.423655,0.645894,1
2,1,0.437587,0.891773,1
2,2,0.963663,0.383442,1
2,3,0.791725,0.528895,1
3,1,0.568045,0.925597,1
3,2,0.071036,0.087129,1
3,3,0.020218,0.83262,1


In [None]:
sm1 = sm.OLS(df['y'], df[['constant','x']]).fit()
sm2 = sm.OLS(df['y'], df[['constant','x']]).fit().get_robustcov_results(cov_type='HAC', maxlags=2)
lmres = PanelOLS(df['y'],  df[['constant','x']], entity_effects=True
                 ).fit(cov_type='clustered', cluster_entity=True)

In [None]:
#| export
def collect_stats(res, # results object to extract stats from
                  package: str, # name of package that generated 'res' object
                  get_default_stats = True, # if True, returns all stats implemented by the f'{package}_results' module
                  add_stats: dict=None, # keys are stats to extract in addition to the default ones, values are attributes of 'res'
                  add_literals: dict=None, # additional info to be added to output dict as literal strings
                  ) -> dict:
    """Collects stats from 'res' object. stats in 'add_stats' can override defaults."""

    out = {}
    out['package'] = package
    results_module = importlib.import_module(f"estout.{package}_results")

    if get_default_stats:
        for stat in results_module.default_stats():
            out[stat] = rgetattr(results_module, stat)(res)

    if add_stats is not None:
        for stat, attr in add_stats.items():
            out[stat] = rgetattr(res, attr)

    if add_literals is not None:
        out.update(add_literals)
        
    return out

In [None]:
stats1 = collect_stats(sm1, package='statsmodels')
stats2 = collect_stats(sm2, package='statsmodels', add_literals={'Cov Type': 'Newey West'})
stats3 = collect_stats(lmres, package='linearmodels', add_stats={'r2b': 'rsquared_between', 'FE':'included_effects'})

In [None]:
assert stats2['Cov Type'] == 'Newey West'

In [None]:
stats3

{'package': 'linearmodels',
 'ynames': ['y'],
 'xnames': ['constant', 'x'],
 'params': constant    0.495446
 x          -0.005671
 Name: parameter, dtype: float64,
 'tstats': constant    2.014245
 x          -0.014231
 Name: tstat, dtype: float64,
 'pvalues': constant    0.100103
 x           0.989196
 Name: pvalue, dtype: float64,
 'covmat':           constant         x
 constant  0.060502 -0.098015
 x        -0.098015  0.158788,
 'se': constant    0.245971
 x           0.398482
 dtype: float64,
 'r2': 5.3676782504452625e-05,
 'nobs': 9,
 'r2b': 0.00022411078437101217,
 'FE': ['Entity']}

In [None]:
stats3['r2b']

0.00022411078437101217

In [None]:
stats3['FE']

['Entity']

In [None]:
#| export
def to_df(res_list: List[dict], # list of outputs from `collect_stats()`
          which_xvars: list=None, # if None, report all xvars
          stats_body: list=['params', 'tstats'], # each element of 'res_list' needs to have these stats as keys; values must be pd.Series
          stats_bottom: list=['r2', 'nobs'], # each element of 'res_list' needs to have these stats as keys; values must be scalars
          labels: dict=None 
          ) -> pd.DataFrame: 
    """Combines results from multiple `collect_stats()` outputs into a single pd.DataFrame"""  
    
    allstats = stats_body + stats_bottom
    ncols = len(res_list)
    dct = {}
    for stat in stats_body:
        dct[stat] = pd.concat([res[stat] for res in res_list], axis=1)
        dct[stat].index = stat + '_' + dct[stat].index

    return dct

In [None]:
d = to_df([stats1, stats2, stats3])

In [None]:
d

{'params':                         0         1  parameter
 params_constant  0.503847  0.503847   0.495446
 params_x        -0.019281 -0.019281  -0.005671,
 'tstats':                         0         1     tstat
 tstats_constant  1.762032  1.543043  2.014245
 tstats_x        -0.044981 -0.049299 -0.014231}

In [None]:
#| export
def to_tex(get_pdf=True, open_pdf=False):
    pass

In [None]:
#| hide
import nbdev; nbdev.nbdev_export()