# core

> Python package that helps collect outputs of statistical analyses into tables and export them to LaTex and pdf (similar the ``estout`` and ``esttab`` commands in Stata).

In [None]:
#| default_exp core

In [None]:
#| hide 
from nbdev.showdoc import *
from nbdev.test import *

In [None]:
#| export
from typing import List, Dict, Tuple
import importlib
import numpy as np
import pandas as pd

import statsmodels.api as sm
from linearmodels import PanelOLS

from estout.utils import *

In [None]:
np.random.seed(123)
df = pd.DataFrame(np.random.rand(9,2), columns=['y','x'])
df['firmid'] = [1]*3 + [2]*3 + [3]*3
df['time'] = [1,2,3]*3
df['constant'] = 1
df = df.set_index(['firmid','time'])
df

Unnamed: 0_level_0,Unnamed: 1_level_0,y,x,constant
firmid,time,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,1,0.696469,0.286139,1
1,2,0.226851,0.551315,1
1,3,0.719469,0.423106,1
2,1,0.980764,0.68483,1
2,2,0.480932,0.392118,1
2,3,0.343178,0.72905,1
3,1,0.438572,0.059678,1
3,2,0.398044,0.737995,1
3,3,0.182492,0.175452,1


In [None]:
sm1 = sm.OLS(df['y'], df[['constant','x']]).fit()
sm2 = sm.OLS(df['y'], df[['constant','x']]).fit().get_robustcov_results(cov_type='HAC', maxlags=2)
lmres = PanelOLS(df['y'],  df[['constant','x']], entity_effects=True
                 ).fit(cov_type='clustered', cluster_entity=True)

In [None]:
#| export
def collect_stats(res, # results object to extract stats from
                  package: str, # name of package that generated 'res' object
                  get_default_stats = True, # if True, returns all stats implemented by the f'{package}_results' module
                  add_stats: dict=None, # keys are stats to extract in addition to the default ones, values are attributes of 'res'
                  add_literals: dict=None, # additional info to be added to output dict as literal strings
                  ) -> dict:
    """Collects stats from 'res' object. stats in 'add_stats' can override defaults."""

    out = {}
    out['package'] = package
    results_module = importlib.import_module(f"estout.{package}_results")

    if get_default_stats:
        for stat in results_module.default_stats():
            out[stat] = rgetattr(results_module, stat)(res)

    if add_stats is not None:
        for stat, attr in add_stats.items():
            out[stat] = rgetattr(res, attr)

    if add_literals is not None:
        out.update(add_literals)
        
    return out

In [None]:
stats1 = collect_stats(sm1, package='statsmodels')
stats2 = collect_stats(sm2, package='statsmodels', add_literals={'Cov Type': 'Newey West'})
stats3 = collect_stats(lmres, package='linearmodels', add_stats={'r2b': 'rsquared_between', 'FE':'included_effects'})

In [None]:
assert stats2['Cov Type'] == 'Newey West'

In [None]:
stats3

{'package': 'linearmodels',
 'ynames': ['y'],
 'xnames': ['constant', 'x'],
 'params': constant    0.516249
 x          -0.044426
 Name: parameter, dtype: float64,
 'tstats': constant    5.041040
 x          -0.194717
 Name: tstat, dtype: float64,
 'pvalues': constant    0.003963
 x           0.853279
 Name: pvalue, dtype: float64,
 'covmat':           constant         x
 constant  0.010488 -0.023365
 x        -0.023365  0.052056,
 'se': constant    0.102409
 x           0.228157
 dtype: float64,
 'r2': 0.0017277475995215452,
 'nobs': 9,
 'r2b': -0.08139183531083982,
 'FE': ['Entity']}

In [None]:
stats3['r2b']

-0.08139183531083982

In [None]:
stats3['FE']

['Entity']

In [None]:
#| export
def to_df(res_list: List[dict], # list of outputs from `collect_stats()`
          which_xvars: list=None, # if None, report all xvars
          stats_body: list=['params', 'tstats'], # each element of 'res_list' needs to have these stats as keys; values must be pd.Series
          stats_bottom: list=['r2', 'nobs'], # each element of 'res_list' needs to have these stats as keys; values must be scalars
          labels: dict=None,
          add_formats: dict=None  
          ) -> pd.DataFrame: 
    """Combines results from multiple `collect_stats()` outputs into a single pd.DataFrame"""  
    
    allstats = stats_body + stats_bottom
    ncols = len(res_list)
    formats = default_formats()
    if add_formats is not None: formats.update(add_formats)

    columns = []
    for i,res in enumerate(res_list):
        newcol = pd.concat([res[x] for x in stats_body], axis=1, ignore_index=True).set_axis(stats_body, axis=1)
        for x in stats_body:
            newcol[x] = newcol[x].map(formats[x].format)
            if x == 'params':
                newcol[x] += get_stars(res['pvalues'])
            else:
                newcol[x] = '(' + newcol[x] + ')'
        newcol = newcol.transpose().melt(var_name='coeff_names', value_name=f'({i+1})').set_index('coeff_names').loc[which_xvars].copy()
        for x in stats_bottom:
            newcol.loc[x,f'({i+1})'] = formats[x].format(res[x]) if x in formats else res[x]

        columns.append(newcol)

    return pd.concat(columns, axis = 1)

In [None]:
d = to_df([stats1, stats2, stats3], which_xvars=['constant','x'], add_formats={'r2':'{:.2f}'})

In [None]:
d

Unnamed: 0_level_0,(1),(2),(3)
coeff_names,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
constant,0.42*,0.42***,0.52***
constant,(2.11),(3.65),(5.04)
x,0.18,0.18,-0.04
x,(0.46),(0.67),(-0.19)
r2,0.03,0.03,0.00
nobs,9,9,9


In [None]:
#| export
def to_tex(get_pdf=True, open_pdf=False):
    pass

In [None]:
#| hide
import nbdev; nbdev.nbdev_export()