### Statsmodels

##### statistical models, hypothesis tests, and data exploration

In [1]:
import numpy as np
import pandas as pd
import statsmodels.api as sm
import statsmodels.formula.api as smf

In [2]:
data = sm.datasets.get_rdataset("Guerry", "HistData").data

In [3]:
type(data)

pandas.core.frame.DataFrame

In [4]:
data.head()

Unnamed: 0,dept,Region,Department,Crime_pers,Crime_prop,Literacy,Donations,Infants,Suicides,MainCity,Wealth,Commerce,Clergy,Crime_parents,Infanticide,Donation_clergy,Lottery,Desertion,Instruction,Prostitutes,Distance,Area,Pop1831
0,1,E,Ain,28870,15890,37,5098,33120,35039,2:Med,73,58,11,71,60,69,41,55,46,13,218.372,5762,346.03
1,2,N,Aisne,26226,5521,51,8901,14572,12831,2:Med,22,10,82,4,82,36,38,82,24,327,65.945,7369,513.0
2,3,C,Allier,26747,7925,13,10973,17044,114121,2:Med,61,66,68,46,42,76,66,16,85,34,161.927,7340,298.26
3,4,E,Basses-Alpes,12935,7289,46,2733,23018,14238,1:Sm,76,49,5,70,12,37,80,32,29,2,351.399,6925,155.9
4,5,E,Hautes-Alpes,17488,8174,69,6962,23076,16171,1:Sm,83,65,10,22,23,64,79,35,7,1,320.28,5549,129.1


In [5]:
data.columns

Index(['dept', 'Region', 'Department', 'Crime_pers', 'Crime_prop', 'Literacy',
       'Donations', 'Infants', 'Suicides', 'MainCity', 'Wealth', 'Commerce',
       'Clergy', 'Crime_parents', 'Infanticide', 'Donation_clergy', 'Lottery',
       'Desertion', 'Instruction', 'Prostitutes', 'Distance', 'Area',
       'Pop1831'],
      dtype='object')

In [6]:
# Fit OLS regression using ln
# with R style formulas
result = smf.ols("Lottery ~ Literacy + np.log(Pop1831)", data=data).fit()

In [7]:
result.summary()

0,1,2,3
Dep. Variable:,Lottery,R-squared:,0.348
Model:,OLS,Adj. R-squared:,0.333
Method:,Least Squares,F-statistic:,22.2
Date:,"Tue, 07 May 2024",Prob (F-statistic):,1.9e-08
Time:,00:07:53,Log-Likelihood:,-379.82
No. Observations:,86,AIC:,765.6
Df Residuals:,83,BIC:,773.0
Df Model:,2,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,246.4341,35.233,6.995,0.000,176.358,316.510
Literacy,-0.4889,0.128,-3.832,0.000,-0.743,-0.235
np.log(Pop1831),-31.3114,5.977,-5.239,0.000,-43.199,-19.424

0,1,2,3
Omnibus:,3.713,Durbin-Watson:,2.019
Prob(Omnibus):,0.156,Jarque-Bera (JB):,3.394
Skew:,-0.487,Prob(JB):,0.183
Kurtosis:,3.003,Cond. No.,702.0


In [8]:
# Using numpy arrays
nobs = 100
X = np.random.random((nobs, 2))
X = sm.add_constant(X)

In [9]:
beta = [1, .1, .5] # coef
e = np.random.random(nobs) # error
y = np.dot(X, beta) + e

In [10]:
res_2 = sm.OLS(y, X).fit()

In [11]:
res_2.summary()

0,1,2,3
Dep. Variable:,y,R-squared:,0.165
Model:,OLS,Adj. R-squared:,0.148
Method:,Least Squares,F-statistic:,9.598
Date:,"Tue, 07 May 2024",Prob (F-statistic):,0.000157
Time:,00:13:06,Log-Likelihood:,-25.133
No. Observations:,100,AIC:,56.27
Df Residuals:,97,BIC:,64.08
Df Model:,2,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,1.5562,0.084,18.476,0.000,1.389,1.723
x1,0.1662,0.114,1.454,0.149,-0.061,0.393
x2,0.4240,0.101,4.193,0.000,0.223,0.625

0,1,2,3
Omnibus:,28.999,Durbin-Watson:,2.261
Prob(Omnibus):,0.0,Jarque-Bera (JB):,6.818
Skew:,-0.272,Prob(JB):,0.0331
Kurtosis:,1.843,Cond. No.,5.39


In [13]:
dir(result)

['HC0_se',
 'HC1_se',
 'HC2_se',
 'HC3_se',
 '_HCCM',
 '__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_abat_diagonal',
 '_cache',
 '_data_attr',
 '_data_in_cache',
 '_get_robustcov_results',
 '_get_wald_nonlinear',
 '_is_nested',
 '_transform_predict_exog',
 '_use_t',
 '_wexog_singular_values',
 'aic',
 'bic',
 'bse',
 'centered_tss',
 'compare_f_test',
 'compare_lm_test',
 'compare_lr_test',
 'condition_number',
 'conf_int',
 'conf_int_el',
 'cov_HC0',
 'cov_HC1',
 'cov_HC2',
 'cov_HC3',
 'cov_kwds',
 'cov_params',
 'cov_type',
 'df_model',
 'df_resid',
 'diagn',
 'eigenvals',
 'el_test',
 'ess',
 'f_pvalue',
 'f_test',
 'fittedvalues',
 'fvalue',
 'get_influence',
 