In [1]:
import statsmodels.api as sm
import statsmodels.formula.api as smf
import statsmodels.graphics.api as smg
import patsy
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from scipy import stats
import seaborn as sns

In [2]:
np.random.seed(123456789)
y = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9])
x1 = np.array([6, 7, 8, 9, 10, 11, 12, 13, 14])
x2 = np.array([11, 12, 13, 14, 15, 16, 17, 18, 19])
X = np.vstack([np.ones(9), x1, x2, x1*x2]).T
X

array([[  1.,   6.,  11.,  66.],
       [  1.,   7.,  12.,  84.],
       [  1.,   8.,  13., 104.],
       [  1.,   9.,  14., 126.],
       [  1.,  10.,  15., 150.],
       [  1.,  11.,  16., 176.],
       [  1.,  12.,  17., 204.],
       [  1.,  13.,  18., 234.],
       [  1.,  14.,  19., 266.]])

In [3]:
beta, res, rank, sval = np.linalg.lstsq(X, y, rcond=None)
beta

array([-5.55555556e-01,  1.88888889e+00, -8.88888889e-01,  1.65916974e-16])

In [4]:
data = {"y": y, "x1": x1, "x2": x2}
y, X = patsy.dmatrices("y ~ 1 + x1 + x2 + x1*x2", data)
y

DesignMatrix with shape (9, 1)
  y
  1
  2
  3
  4
  5
  6
  7
  8
  9
  Terms:
    'y' (column 0)

In [5]:
X

DesignMatrix with shape (9, 4)
  Intercept  x1  x2  x1:x2
          1   6  11     66
          1   7  12     84
          1   8  13    104
          1   9  14    126
          1  10  15    150
          1  11  16    176
          1  12  17    204
          1  13  18    234
          1  14  19    266
  Terms:
    'Intercept' (column 0)
    'x1' (column 1)
    'x2' (column 2)
    'x1:x2' (column 3)

In [6]:
type(X)

patsy.design_info.DesignMatrix

In [7]:
np.array(X)

array([[  1.,   6.,  11.,  66.],
       [  1.,   7.,  12.,  84.],
       [  1.,   8.,  13., 104.],
       [  1.,   9.,  14., 126.],
       [  1.,  10.,  15., 150.],
       [  1.,  11.,  16., 176.],
       [  1.,  12.,  17., 204.],
       [  1.,  13.,  18., 234.],
       [  1.,  14.,  19., 266.]])

In [8]:
df_data = pd.DataFrame(data)
y, X = patsy.dmatrices("y ~ 1 + x1 + x2 + x1:x2", df_data, return_type="dataframe")
X

Unnamed: 0,Intercept,x1,x2,x1:x2
0,1.0,6.0,11.0,66.0
1,1.0,7.0,12.0,84.0
2,1.0,8.0,13.0,104.0
3,1.0,9.0,14.0,126.0
4,1.0,10.0,15.0,150.0
5,1.0,11.0,16.0,176.0
6,1.0,12.0,17.0,204.0
7,1.0,13.0,18.0,234.0
8,1.0,14.0,19.0,266.0


In [9]:
model = sm.OLS(y, X)
result = model.fit()
result.params

Intercept   -5.555556e-01
x1           1.888889e+00
x2          -8.888889e-01
x1:x2       -6.938894e-17
dtype: float64

In [10]:
model = smf.ols("y ~ 1 + x1 + x2 + x1:x2", df_data)
result = model.fit()
result.params

Intercept   -5.555556e-01
x1           1.888889e+00
x2          -8.888889e-01
x1:x2       -6.938894e-17
dtype: float64

In [11]:
print(result.summary())

  "anyway, n=%i" % int(n))


                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       1.000
Model:                            OLS   Adj. R-squared:                  1.000
Method:                 Least Squares   F-statistic:                 3.211e+27
Date:                Fri, 22 Mar 2019   Prob (F-statistic):           8.15e-82
Time:                        20:53:10   Log-Likelihood:                 258.76
No. Observations:                   9   AIC:                            -511.5
Df Residuals:                       6   BIC:                            -510.9
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept     -0.5556   6.47e-14  -8.59e+12      0.0