In [10]:
from __future__ import print_function
import statsmodels.api as sm
import pandas
from patsy import dmatrices

In [11]:
df = sm.datasets.get_rdataset("Guerry", "HistData").data

In [12]:
vars = ['Department', 'Lottery', 'Literacy', 'Wealth', 'Region']

In [13]:
df = df[vars]
df[-5:]

Unnamed: 0,Department,Lottery,Literacy,Wealth,Region
81,Vienne,40,25,68,W
82,Haute-Vienne,55,13,67,C
83,Vosges,14,62,82,E
84,Yonne,51,47,30,C
85,Corse,83,49,37,


In [14]:
df = df.dropna()
df[-5:]

Unnamed: 0,Department,Lottery,Literacy,Wealth,Region
80,Vendee,68,28,56,W
81,Vienne,40,25,68,W
82,Haute-Vienne,55,13,67,C
83,Vosges,14,62,82,E
84,Yonne,51,47,30,C


In [15]:
# Design matrices (endog & exog)
y, X = dmatrices('Lottery ~ Literacy + Wealth + Region', data=df, return_type='dataframe')

In [16]:
y[:3]

Unnamed: 0,Lottery
0,41.0
1,38.0
2,66.0


In [17]:
X[:3]

Unnamed: 0,Intercept,Region[T.E],Region[T.N],Region[T.S],Region[T.W],Literacy,Wealth
0,1.0,1.0,0.0,0.0,0.0,37.0,73.0
1,1.0,0.0,1.0,0.0,0.0,51.0,22.0
2,1.0,0.0,0.0,0.0,0.0,13.0,61.0


In [21]:
mod = sm.OLS(y, X)    # Describe model
res = mod.fit()       # Fit model
print(res.summary()) 

                            OLS Regression Results                            
Dep. Variable:                Lottery   R-squared:                       0.338
Model:                            OLS   Adj. R-squared:                  0.287
Method:                 Least Squares   F-statistic:                     6.636
Date:                Tue, 10 Dec 2019   Prob (F-statistic):           1.07e-05
Time:                        09:48:34   Log-Likelihood:                -375.30
No. Observations:                  85   AIC:                             764.6
Df Residuals:                      78   BIC:                             781.7
Df Model:                           6                                         
Covariance Type:            nonrobust                                         
                  coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------------------------
Intercept      38.6517      9.456      4.087      

In [22]:
res.params

Intercept      38.651655
Region[T.E]   -15.427785
Region[T.N]   -10.016961
Region[T.S]    -4.548257
Region[T.W]   -10.091276
Literacy       -0.185819
Wealth          0.451475
dtype: float64

In [23]:
res.rsquared

0.337950869192882

In [24]:
sm.stats.linear_rainbow(res)

(0.8472339976156913, 0.6997965543621643)

In [25]:
sm.graphics.plot_partregress('Lottery', 'Wealth', ['Region', 'Literacy'],
                             data=df, obs_labels=False)

<Figure size 640x480 with 1 Axes>