# core

> Main functions of `estout` package

Main functions are:

- `collect_stats`: extracts a given set of attributes from results object generated by stats packages
- `to_df`: takes a list of `collect_stats` outputs and merges them as separate columns in a pd.DataFrame
- `to_tex`: takes one or more outputs from `to_df` and exports them to a LaTex file

In [None]:
#|default_exp core

In [None]:
#|hide 
from nbdev.showdoc import *
from nbdev.test import *

In [None]:
#|exports
from typing import List
import importlib
import numpy as np
import pandas as pd

import statsmodels.api as sm
from linearmodels import PanelOLS

from estout.utils import *

Set up an example dataset and run a few regressions to showcase the functions in this module.

In [None]:
np.random.seed(123)
df = pd.DataFrame(np.random.rand(9,3), 
                  columns=['y','x','z'],
                  index = pd.MultiIndex.from_product([[1,2,3],[1,2,3]], names=['firmid','time'])
                  ).assign(cons = 1)
sm1 = sm.OLS(df['y'], df[['cons','x']]).fit()
sm2 = sm.OLS(df['y'], df[['cons','x','z']]).fit().get_robustcov_results(cov_type='HAC', maxlags=2)
lmres = PanelOLS(df['y'],  df[['cons','x','z']], entity_effects=True
                 ).fit(cov_type='clustered', cluster_entity=True)

In [None]:
#|export
def collect_stats(res, # results object to extract stats from
                  package: str=None, # name of package that generated 'res' object
                  get_default_stats = True, # if True, returns all stats implemented by the f'{package}_results' module
                  add_stats: dict=None, # keys are stats to extract in addition to the default ones, values are attributes of 'res'
                  add_literals: dict=None, # additional info to be added to output dict as literal strings
                  ) -> dict:
    """Collects stats from 'res' object. stats in 'add_stats' can override default stats()"""

    if res.__module__.startswith('linearmodels'): package = 'linearmodels'
    if res.__module__.startswith('statsmodels'): package = 'statsmodels'

    out = {}
    out['package'] = package
    results_module = importlib.import_module(f"estout.{package}_results")

    if get_default_stats:
        for stat in results_module.default_stats():
            out[stat] = rgetattr(results_module, stat)(res)

    if add_stats is not None:
        for stat, attr in add_stats.items():
            out[stat] = rgetattr(res, attr)

    if add_literals is not None:
        out.update(add_literals)
        
    return out

In [None]:
stats1 = collect_stats(sm1, package='statsmodels')
stats2 = collect_stats(sm2, package='statsmodels', add_literals={'Cov Type': 'Newey West'})
stats3 = collect_stats(lmres, package='linearmodels', add_stats={'r2b': 'rsquared_between', 'FE':'included_effects'})

In [None]:
stats1

{'package': 'statsmodels',
 'ynames': ['y'],
 'xnames': ['cons', 'x'],
 'params': cons    0.507852
 x       0.345003
 dtype: float64,
 'tstats': cons    3.905440
 x       1.292246
 dtype: float64,
 'pvalues': cons    0.005858
 x       0.237293
 dtype: float64,
 'covmat':           cons         x
 cons  0.016910 -0.030531
 x    -0.030531  0.071278,
 'se': cons    0.130037
 x       0.266979
 dtype: float64,
 'r2': 0.19260886185799486,
 'nobs': 9}

In [None]:
assert stats2['Cov Type'] == 'Newey West'

In [None]:
stats3['r2b']

0.7954933715233714

In [None]:
stats3['FE']

['Entity']

In [None]:
#|export
def to_df(res_list: List[dict], # list of outputs from `collect_stats()`
          which_xvars: list=None, # if None, report all xvars
          stats_body: list=['params', 'tstats'], # each element of 'res_list' needs to have these stats as keys; values must be pd.Series
          stats_bottom: list=['r2', 'nobs'], # each element of 'res_list' needs to have these stats as keys; values must be scalars
          labels: dict=None,
          add_formats: dict=None  
          ) -> pd.DataFrame: 
    """Combines results from multiple `collect_stats()` outputs into a single pd.DataFrame"""  
    
    formats = default_formats()
    if add_formats is not None: formats.update(add_formats)
    
    columns = []
    for i,res in enumerate(res_list):
        newcol = pd.concat([res[x] for x in stats_body], axis=1, ignore_index=True).set_axis(stats_body, axis=1)
        for x in stats_body:
            newcol[x] = newcol[x].map(formats[x].format)
            if x == 'params':
                newcol[x] += get_stars(res['pvalues'])
            else:
                newcol[x] = '(' + newcol[x] + ')'
        newcol = newcol.stack(level=0) #set_index('coeff_names')
        columns.append(newcol)

    out = pd.concat(columns, axis = 1)
    if which_xvars is None: out = out.loc[which_xvars].copy()
    
    for i,res in enumerate(res_list):
        for x in stats_bottom:
            out.loc[x,i] = formats[x].format(res[x]) if x in formats else res[x]

    if labels is not None:
        for var in set(out.droplevel(1).index):
            if var in labels: out = out.rename(index={var:labels[var]}, level=0)            

    return out.astype('string').fillna('')

In [None]:
d = to_df(res_list=[stats1, stats2, stats3], 
          which_xvars=['cons','x','z'], 
          add_formats={'r2':'{:.2f}'},
          labels={'cons':'Intercept'})
d

Unnamed: 0,Unnamed: 1,0,1,2
Intercept,params,0.51***,0.70***,0.73***
Intercept,tstats,(3.91),(21.48),(167.36)
x,params,0.35,0.57**,0.64*
x,tstats,(1.29),(2.85),(2.26)
z,params,,-0.64**,-0.77**
z,tstats,,(-3.55),(-2.91)
r2,,0.19,0.49,0.35
nobs,,9,9,9


In [None]:
#|export
def to_tex(get_pdf=True, open_pdf=False):
    pass

In [None]:
#|hide
import nbdev; nbdev.nbdev_export()