# core

> Main functions of `estout` package

Main functions are:

- `collect_stats`: extracts a given set of attributes from results object generated by stats packages
- `to_df`: takes a list of `collect_stats` outputs and merges them as separate columns in a pd.DataFrame
- `to_tex`: takes one or more outputs from `to_df` and exports them to a LaTex file

In [None]:
#|default_exp core

In [None]:
#|hide 
from nbdev.showdoc import *
from nbdev.test import *

In [None]:
#|exports
from __future__ import annotations
from pathlib import Path 
from typing import List, Dict, Literal, Union
import importlib

import numpy as np
import pandas as pd
import statsmodels.api as sm
from linearmodels import PanelOLS

from estout.utils import *

Set up an example dataset and run a few regressions to showcase the functions in this module.

In [None]:
np.random.seed(123)
df = pd.DataFrame(np.random.rand(9,3), 
                  columns=['y','x','z'],
                  index = pd.MultiIndex.from_product([[1,2,3],[1,2,3]], names=['firmid','time'])
                  ).assign(cons = 1)
sm1 = sm.OLS(df['y'], df[['cons','x']]).fit()
sm2 = sm.OLS(df['y'], df[['cons','x','z']]).fit().get_robustcov_results(cov_type='HAC', maxlags=2)
lmres = PanelOLS(df['y'],  df[['cons','x','z']], entity_effects=True
                 ).fit(cov_type='clustered', cluster_entity=True)

In [None]:
#|export
def collect_stats(res, # Results object to extract stats from
                  get_default_stats = True, # If True, returns all stats implemented by the f'{package}_results' module
                  add_stats: dict=None, # Keys are stats to extract in addition to the default ones; values are attributes of 'res'
                  add_literals: dict=None, # Additional info to be added to output dict; values must be scalars
                  ) -> dict:
    """Collects stats from 'res' object. stats in 'add_stats' can override default stats()"""

    out = {'package': res.__module__.split('.')[0]}
    results_module = importlib.import_module(f"estout.{out['package']}_results")

    if get_default_stats:
        for stat in results_module.__all__:
            out[stat] = rgetattr(results_module, stat)(res)

    if add_stats is not None:
        for stat, attr in add_stats.items():
            out[stat] = rgetattr(res, attr)

    if add_literals is not None:
        out.update(add_literals)
        
    return out

In [None]:
stats1 = collect_stats(sm1)
stats2 = collect_stats(sm2, add_literals={'Cov Type': 'Newey West'})
stats3 = collect_stats(lmres, add_stats={'r2b': 'rsquared_between', 'FE':'included_effects'})

In [None]:
stats1

{'package': 'statsmodels',
 'ynames': ['y'],
 'xnames': ['cons', 'x'],
 'params': cons    0.507852
 x       0.345003
 dtype: float64,
 'tstats': cons    3.905440
 x       1.292246
 dtype: float64,
 'pvalues': cons    0.005858
 x       0.237293
 dtype: float64,
 'covmat':           cons         x
 cons  0.016910 -0.030531
 x    -0.030531  0.071278,
 'se': cons    0.130037
 x       0.266979
 dtype: float64,
 'nobs': 9,
 'r2': 0.19260886185799475}

In [None]:
assert stats2['Cov Type'] == 'Newey West'

In [None]:
stats3['r2b']

0.7954933715233719

In [None]:
stats3['FE']

['Entity']

In [None]:
#|export
def to_df(res_list: List[dict], # List of outputs from `collect_stats()`
          which_xvars: list=None, # If None, report all xvars
          stats_body: list=['params', 'tstats'], # Each element of 'res_list' needs to have these stats as keys; values must be pd.Series
          stats_bottom: list=['r2', 'nobs'], # Each element of 'res_list' needs to have these stats as keys; values must be scalars
          labels: dict=None,
          add_formats: dict=None  
          ) -> pd.DataFrame: 
    """Combines results from multiple `collect_stats()` outputs into a single pd.DataFrame"""  
    
    formats = default_formats()
    if add_formats is not None: formats.update(add_formats)
    
    columns = []
    for i,res in enumerate(res_list):
        newcol = pd.concat([res[x] for x in stats_body], axis=1, ignore_index=True).set_axis(stats_body, axis=1)
        for x in stats_body:
            newcol[x] = newcol[x].map(formats[x].format)
            if x == 'params':
                newcol[x] += get_stars(res['pvalues'])
            else:
                newcol[x] = '(' + newcol[x] + ')'
        newcol = newcol.stack(level=0) #set_index('coeff_names')
        columns.append(newcol)

    out = pd.concat(columns, axis = 1)
    if which_xvars is not None: out = out.loc[which_xvars].copy()
    
    for i,res in enumerate(res_list):
        for x in stats_bottom:
            out.loc[x,i] = formats[x].format(res[x]) if x in formats else res[x]

    if labels is not None:
        for var in set(out.droplevel(1).index):
            if var in labels: out = out.rename(index={var:labels[var]}, level=0) 

    #Clean up row names 
    out = out.reset_index()
    out.loc[~out['level_1'].isin(['params','']), 'level_0'] = ''
    out = out.set_index(['level_0','level_1'])
    out.index.names = (None,None)

    return out.astype('string').fillna('')

In [None]:
d = to_df(res_list=[stats1, stats2, stats1, stats3], 
          which_xvars=['cons','x','z'], 
          add_formats={'r2':'{:.2f}'},
          labels={'nobs':'Observations', 'r2':'$R^2$'})
d

Unnamed: 0,Unnamed: 1,0,1,2,3
cons,params,0.51***,0.70***,0.51***,0.73***
,tstats,(3.91),(21.48),(3.91),(167.36)
x,params,0.35,0.57**,0.35,0.64*
,tstats,(1.29),(2.85),(1.29),(2.26)
z,params,,-0.64**,,-0.77**
,tstats,,(-3.55),,(-2.91)
$R^2$,,0.19,0.49,0.19,0.35
Observations,,9,9,9,9


Note how we used a LaTex-friendly label for the $R^2$ coefficient so we don't have to re-label this row before we send it to LaTex.

In [None]:
#|export
def to_tex(dfs: pd.DataFrame|List[pd.DataFrame], # DataFrame(s) to be converted to tex table; if multiple, they will be panels in a larger table
            outfile: Path|str=None, # Where to save resulting tex output
            title: str='Table title', # Table title
            notes: str='Table description', # Some call this the table caption
            notes_on_top: bool=True, # Set to False if you want table description (caption) to be at the bottom
            label: str='', # Table label (for referencing within LaTex document)
            table_type: Literal['table','sidewaystable']='table',
            font_size: str='\\footnotesize', # Gets applied to the table contents as well as its caption
            addtocounter: int=0, # Set to -1 for tables that are just a continuation of a table on a new page
            
            panel_title: List[str]=None, # One element in the list for each dataframe in 'dfs'
            palign: Literal['l','r','c']='l', # Alignment of panel title 
            col_groups: List[dict]=None, # Keys are group names; values are lists of consecutive indices of columns in the group
            col_names: List[Union[list,bool]]=True, # If False, none; if True, use df column names; if list, gives custom column names
            hlines: List[List[int]]=None, # Row indices under which to place hline
            tabular_env: str='tabular*' #LaTex tabular environment
            ) -> str:
    """Create tex code to generate table from one or more dataframes"""

    if isinstance(dfs, pd.DataFrame): dfs = [dfs]
    if panel_title is None: panel_title = ['']*len(dfs)
    if col_groups is None: col_groups = [None]*len(dfs)
    if col_names in [True, False]: col_names = [col_names]*len(dfs)
    if hlines is None: hlines = [[]]*len(dfs)
    body =  '\n \smallskip \n'.join([df_to_tex(dfs[i], panel_title=panel_title[i], palign=palign, tabular_env=tabular_env,
                                            col_groups=col_groups[i], col_names=col_names[i], hlines=hlines[i]) 
                            for i in range(len(dfs))])
    
    pre = "\\newpage \n \\clearpage \n "
    pre += f"\\begin{{{table_type}}}[!h] {font_size} \n"
    pre += f"\\addtocounter{{table}}{{{addtocounter}}} \n"
    pre += f"\\caption{{\\textbf{{{title}}}}} \n"

    post = f"\\label{{{label}}} \n \\end{{{table_type}}} \n"

    notes_tex = ''
    if addtocounter==0: notes_tex = f"\\par {{{notes}}}"

    if notes_on_top:
        mid = notes_tex + f" \n\n \\vspace{{{'2mm'}}} \n\n {body} \n"
    else:
        mid = f"{body} \n\n \\vspace{{{'2mm'}}} \n\n " + notes_tex + ' \n'

    content = pre + mid + post

    if outfile:
        with open(outfile, "+w") as f:
            f.write(content) 
    return content

In [None]:
tbl = to_tex([d,d], panel_title=['Panel A: Some title', 'Panel B: Some title'], 
               col_groups=[{'Group1':[1,2]}]*2,
               col_names=[['Model 1', 'Model 2', 'Model 3', 'Model 4']]*2,
               hlines=[[0,1,4,12], [1,4,12]] )

In [None]:
#|export
def to_pdf(outfile: str, # Path to .tex file where combined tables are saved (must contain .tex extension)
            table_tex_code: str|Path|List[Union[str,Path]]=None, # String(s) or Paths to files containing table tex code (e.g. like outputs of to_tex())
            article_spec=r'\documentclass[11pt]{article}',
            captionsetup="format=plain, labelsep=newline, labelfont = bf, justification=centering",
            make_pdf: bool=True, 
            open_pdf: bool=False):
    r"""Creates PDF with one or more tables given their tex code (from \being{table} to \end{table})"""

    if isinstance(table_tex_code, str): table_tex_code = [table_tex_code]
    tex_strings = []
    for t in table_tex_code:
        if isinstance(t, Path):
            with open(t,'r') as f:
                tex_strings.append(f.read())
        else: tex_strings.append(t)
    tables = ' \n '.join(tex_strings)

    preamble = r"\usepackage{booktabs,setspace,graphicx,epstopdf,tabularx, bigstrut,textcomp, outlines}" + "\n"
    preamble += r"\usepackage{amsmath,amsfonts,amssymb,amsthm,caption}" + "\n"
    preamble += r"\usepackage[margin=1in]{geometry}" + "\n"
    preamble += f"\captionsetup{{{captionsetup}}}" + "\n"    

    content = "\n".join([article_spec, preamble, r'\begin{document}', tables, r'\end{document}'])

    with open(outfile, "w") as f:
        f.write(content) 
    if make_pdf:
        pdf_path = make_pdf_from_tex(outfile)
        if open_pdf: return open_pdf_file(pdf_path)

For the test below to work, you need to have `TexLive` installed (and change the path below to a valid path on your system).

In [None]:
#|eval: false
to_pdf('../_outputs/paper.tex', table_tex_code=[tbl, tbl])

PDF creation successful!


In [None]:
#|hide
import nbdev; nbdev.nbdev_export()