In [1]:
import numpy as np
import pandas as pd
from numpy.linalg import inv
from sklearn.datasets import load_boston
from statsmodels.regression.linear_model import OLS

In [2]:
# load the boston data set
boston = load_boston()

# obtain the feature matrix as a numpy array
X = boston.data

# obtain the target variable as a numpy array
y = boston.target

In [3]:
print(X.shape)

(506, 13)


In [4]:
feature_names = boston.feature_names
print(feature_names)

['CRIM' 'ZN' 'INDUS' 'CHAS' 'NOX' 'RM' 'AGE' 'DIS' 'RAD' 'TAX' 'PTRATIO'
 'B' 'LSTAT']


In [5]:
# create vector of ones...
int = np.ones(shape=y.shape)[..., None]

#...and add to feature matrix
X = np.concatenate((int, X), 1)

In [6]:
# calculate coefficients using closed-form solution
coeffs = inv(X.transpose().dot(X)).dot(X.transpose()).dot(y)

In [7]:
# extract the feature names of the boston data set and prepend the intercept
feature_names = np.insert(boston.feature_names, 0, 'INT')

# collect results into a DataFrame for pretty printing
results = pd.DataFrame({'coeffs':coeffs}, index=feature_names)

print(results.round(2))

         coeffs
INT       36.49
CRIM      -0.11
ZN         0.05
INDUS      0.02
CHAS       2.69
NOX      -17.80
RM         3.80
AGE        0.00
DIS       -1.48
RAD        0.31
TAX       -0.01
PTRATIO   -0.95
B          0.01
LSTAT     -0.53


In [8]:
# create a linear model and extract the parameters
coeffs_lm = OLS(y, X).fit().params

In [9]:
results['coeffs_lm'] = coeffs_lm

print(results.round(2))

         coeffs  coeffs_lm
INT       36.49      36.49
CRIM      -0.11      -0.11
ZN         0.05       0.05
INDUS      0.02       0.02
CHAS       2.69       2.69
NOX      -17.80     -17.80
RM         3.80       3.80
AGE        0.00       0.00
DIS       -1.48      -1.48
RAD        0.31       0.31
TAX       -0.01      -0.01
PTRATIO   -0.95      -0.95
B          0.01       0.01
LSTAT     -0.53      -0.53
