# statsmodels

> Functions that extract and standardize results from `statsmodels.api.OLS.fit()` objects and `statsmodels.api.OLS.fit().get_robustcov_results()` objects.

In [None]:
#| default_exp statsmodels_results

In [None]:
#| hide 
from nbdev.showdoc import *
from nbdev.test import * 

In [None]:
#| export
import numpy as np
import pandas as pd
import statsmodels.api as sm

Start with an example panel dataset (three firms over three periods of time).

In [None]:
np.random.seed(0)
df = pd.DataFrame(np.random.rand(9,2), columns=['y','x'])
df['firmid'] = [1]*3 + [2]*3 + [3]*3
df['time'] = [1,2,3]*3
df['constant'] = 1
df = df.set_index(['firmid','time'])
df

Unnamed: 0_level_0,Unnamed: 1_level_0,y,x,constant
firmid,time,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,1,0.548814,0.715189,1
1,2,0.602763,0.544883,1
1,3,0.423655,0.645894,1
2,1,0.437587,0.891773,1
2,2,0.963663,0.383442,1
2,3,0.791725,0.528895,1
3,1,0.568045,0.925597,1
3,2,0.071036,0.087129,1
3,3,0.020218,0.83262,1


And regress `y` on `x` and a constant.

In [None]:
sm1 = sm.OLS(df['y'], df[['constant','x']]).fit()
sm2 = sm.OLS(df['y'], df[['constant','x']]).fit().get_robustcov_results(cov_type='HAC', maxlags=2)

The following functions will extract some key information from the `sm1` and `sm2` objects. 

Note that for results produced by `.get_robustcov_results()` (the `sm2` example), many atributes (e.g. `params`, `tvalues`, and `pvalues`) are returned as ndarray not pd.Series. So we will convert them to pd.Series in the appropriate functions.

These function names will be consistent across all the different packages supported by `estout`.

In [None]:
#| export 
def ynames(res): return list(res.model.endog_names)

In [None]:
ynames(sm1)

['y']

In [None]:
#| export 
def xnames(res): return res.model.exog_names

In [None]:
xnames(sm1)

['constant', 'x']

In [None]:
#| export 
def params(res): 
    if isinstance(res.params, np.ndarray):
            return pd.Series(res.params, index=xnames(res)) 
    else:
        return res.params

In [None]:
params(sm1)

constant    0.503847
x          -0.019281
dtype: float64

In [None]:
params(sm2)

constant    0.503847
x          -0.019281
dtype: float64

In [None]:
#| export 
def tstats(res):
    if isinstance(res.tvalues, np.ndarray):
            return pd.Series(res.tvalues, index=xnames(res)) 
    else:
        return res.tvalues

In [None]:
tstats(sm1)

constant    1.762032
x          -0.044981
dtype: float64

In [None]:
tstats(sm2)

constant    1.543043
x          -0.049299
dtype: float64

In [None]:
#| export
def pvalues(res):
    if isinstance(res.pvalues, np.ndarray):
            return pd.Series(res.pvalues, index=xnames(res)) 
    else:
        return res.pvalues

In [None]:
pvalues(sm1)

constant    0.121441
x           0.965379
dtype: float64

In [None]:
pvalues(sm2)

constant    0.166728
x           0.962058
dtype: float64

In [None]:
#| export
def covmat(res):
    if isinstance(res.cov_params(), np.ndarray):
            return pd.DataFrame(res.cov_params(), index=xnames(res), columns=xnames(res)) 
    else:
        return res.cov_params()

In [None]:
covmat(sm1)

Unnamed: 0,constant,x
constant,0.081765,-0.113414
x,-0.113414,0.183735


In [None]:
covmat(sm2)

Unnamed: 0,constant,x
constant,0.106621,-0.123268
x,-0.123268,0.152958


In [None]:
#| export
def se(res): return pd.Series(np.sqrt(np.diag(np.array(covmat(res)))),index=xnames(res))

In [None]:
se(sm1)

constant    0.285946
x           0.428643
dtype: float64

In [None]:
se(sm2)

constant    0.326528
x           0.391099
dtype: float64

In [None]:
#| export
def nobs(res): return int(res.nobs)

In [None]:
nobs(sm1)

9

In [None]:
#| export
def r2(res): return res.rsquared

In [None]:
r2(sm1)

0.00028895966112796323

In [None]:
#| hide
import nbdev; nbdev.nbdev_export()