In [None]:
#| hide
#from estout.core import *

# estout

> Collect outputs from stats packages and export them to pd.DataFrame and LaTex

This file will become your README and also the index of your documentation.

## Install

```sh
pip install estout
```

## How to use

Set up an example dataset and run a few regressions to showcase the functions in this module.

In [None]:
import numpy as np
import pandas as pd
import statsmodels.api as sm
from linearmodels import PanelOLS
import estout
from IPython.display import display

In [None]:
np.random.seed(123)
df = pd.DataFrame(np.random.rand(9,3), 
                  columns=['y','x','z'],
                  index = pd.MultiIndex.from_product([[1,2,3],[1,2,3]], names=['firmid','time'])
                  ).assign(cons = 1)
sm1 = sm.OLS(df['y'], df[['cons','x']]).fit()
sm2 = sm.OLS(df['y'], df[['cons','x','z']]).fit().get_robustcov_results(cov_type='HAC', maxlags=2)
lmres = PanelOLS(df['y'],  df[['cons','x','z']], entity_effects=True
                 ).fit(cov_type='clustered', cluster_entity=True)

### Extracting statistics after fitting a model

Below, we collect just the default set of statistics from the `sm1` object. These are given by the functions implemented in the `statsmodels_results` module (since `sm1` was generated by the `statsmodels` package).

In [None]:
estout.collect_stats(sm1)

{'package': 'statsmodels',
 'ynames': ['y'],
 'xnames': ['cons', 'x'],
 'params': cons    0.507852
 x       0.345003
 dtype: float64,
 'tstats': cons    3.905440
 x       1.292246
 dtype: float64,
 'pvalues': cons    0.005858
 x       0.237293
 dtype: float64,
 'covmat':           cons         x
 cons  0.016910 -0.030531
 x    -0.030531  0.071278,
 'se': cons    0.130037
 x       0.266979
 dtype: float64,
 'nobs': 9,
 'r2': 0.19260886185799486}

Collect statistics by specifying the name of their attribute in the results object (using the `add_stats` parameter):

In [None]:
estout.collect_stats(sm1, get_default_stats=False, add_stats={'xnames': 'model.exog_names',
                                                              'Adj. R2': 'rsquared_adj'})

{'package': 'statsmodels',
 'xnames': ['cons', 'x'],
 'Adj. R2': 0.07726727069485129}

Add scalar statistics not available as attributes of the results object (using the `add_literals` paramter):

In [None]:
estout.collect_stats(sm1, get_default_stats=False, add_literals={'Fixed Effects': 'No', 
                                                                 'Nr observations': 123})

{'package': 'statsmodels', 'Fixed Effects': 'No', 'Nr observations': 123}

### Combining model results into a DataFrame

Start by collecting stats from each model and combining them in a list.

In [None]:
allmodels = []
for res in [sm1, sm2, lmres]:
    allmodels.append(estout.collect_stats(res))

In [None]:
a = estout.to_df(allmodels)
a

Unnamed: 0,Unnamed: 1,0,1,2
cons,params,0.51***,0.70***,0.73***
cons,tstats,(3.91),(21.48),(167.36)
x,params,0.35,0.57**,0.64*
x,tstats,(1.29),(2.85),(2.26)
z,params,,-0.64**,-0.77**
z,tstats,,(-3.55),(-2.91)
r2,,0.193,0.487,0.352
nobs,,9,9,9


We can choose to report only a subset of the regressors.

In [None]:
estout.to_df(allmodels, which_xvars=['x','z'])

Unnamed: 0,Unnamed: 1,0,1,2
x,params,0.35,0.57**,0.64*
x,tstats,(1.29),(2.85),(2.26)
z,params,,-0.64**,-0.77**
z,tstats,,(-3.55),(-2.91)
r2,,0.193,0.487,0.352
nobs,,9,9,9


Report other statistics under the parameter values.

In [None]:
estout.to_df(allmodels, stats_body=['params','se','pvalues'], which_xvars=['x'])

Unnamed: 0,Unnamed: 1,0,1,2
x,params,0.35,0.57**,0.64*
x,se,(0.27),(0.20),(0.28)
x,pvalues,(0.237),(0.029),(0.086)
r2,,0.193,0.487,0.352
nobs,,9,9,9


Change the statistics reported at the bottom of the table

In [None]:
estout.to_df(allmodels, stats_bottom=['r2'],  which_xvars=['x'])

Unnamed: 0,Unnamed: 1,0,1,2
x,params,0.35,0.57**,0.64*
x,tstats,(1.29),(2.85),(2.26)
r2,,0.193,0.487,0.352


Change the formatting for any of the statistics reported.

In [None]:
estout.to_df(allmodels, add_formats={'params':'{:.3}','r2':'{:.2f}'}, which_xvars=['x'])

Unnamed: 0,Unnamed: 1,0,1,2
x,params,0.345,0.571**,0.643*
x,tstats,(1.29),(2.85),(2.26)
r2,,0.19,0.49,0.35
nobs,,9,9,9


Replace regressor (or bottom stats) names with labels.

In [None]:
estout.to_df(allmodels, labels={'cons':'Intercept', 'nobs':'Observations'}, which_xvars=['cons'])

Unnamed: 0,Unnamed: 1,0,1,2
Intercept,params,0.51***,0.70***,0.73***
Intercept,tstats,(3.91),(21.48),(167.36)
r2,,0.193,0.487,0.352
Observations,,9,9,9


Since the output of `to_df` is a pd.DataFrame, it is easy to add more information at the bottom of the table without having to re-run `collect_stats`.

In [None]:
df = estout.to_df(allmodels)
df.loc['Fixed effects',:] = ['No','No','Entity']
df

Unnamed: 0,Unnamed: 1,0,1,2
cons,params,0.51***,0.70***,0.73***
cons,tstats,(3.91),(21.48),(167.36)
x,params,0.35,0.57**,0.64*
x,tstats,(1.29),(2.85),(2.26)
z,params,,-0.64**,-0.77**
z,tstats,,(-3.55),(-2.91)
r2,,0.193,0.487,0.352
nobs,,9,9,9
Fixed effects,,No,No,Entity
