## Show regression results using normal equation and sklearn package

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
sns.set(color_codes=True)
import numpy as np
from scipy.stats import norm
from sklearn.preprocessing import StandardScaler
from scipy import stats
import warnings
from mpl_toolkits.mplot3d import Axes3D
warnings.filterwarnings('ignore')
#%matplotlib inline
%matplotlib notebook

In [2]:
# columns are x1, x2, x0, y
data = pd.read_csv("2D_MLE_MAP_Data.csv", header=None, index_col=None, skiprows=2)
# third column is x0 so need to rearrange
data_new = data[[2,0,1,3]]

## Compute MLE weights using normal equation

$$w_{MLE} = (X^TX)^{-1} X^T{y}$$

In [3]:
m = 31
x_train = data_new.iloc[0:m-1,0:3] #dont know why this works, carry on
y_train = data_new.iloc[0:m-1, 3]
xTx = x_train.T.dot(x_train)
XtX = np.linalg.inv(xTx)
w_mle = XtX.dot(x_train.T).dot(y_train)
w_mle

array([ 0.80906676,  0.71019338,  0.4603808 ])

## Compute MLE weights using sklearn linear.fit function

In [4]:
# Using sklearn
from sklearn import linear_model
# Create linear regression object
linear = linear_model.LinearRegression()
# Train the model using the training sets and check score
linear.fit(x_train, y_train)
linear.score(x_train, y_train)
#Equation coefficient and Intercept
print('Coefficient: \n', linear.coef_)
print('Intercept: \n', linear.intercept_)

Coefficient: 
 [ 0.          0.71019338  0.4603808 ]
Intercept: 
 0.809066760915


## Compute the ridge regression weights, i.e. l2 regularization via normal equation

$$w_{ridge}=(X^TX+\lambda*I)^{-1} X^T{y} $$

In [10]:
l = .5
dim = 3
reg = np.matrix(np.eye(dim,dim)*l)
xTx = x_train.T.dot(x_train)+reg
XtX = np.linalg.inv(xTx)
w_ridge = XtX.dot(pd.DataFrame((x_train.T).dot(y_train)))
w_ridge

array([[ 0.79561782],
       [ 0.70229278],
       [ 0.45051326]])

## Compute ridge regression using sklearn

In [9]:
#I gotta sort out the value of alpha vs. lambda
from sklearn.linear_model import Ridge
ridgereg = Ridge(alpha=.01,normalize=True)
ridgereg.fit(x_train, y_train)
ridgereg.score(x_train, y_train)
#Equation coefficient and Intercept
print('Coefficient: \n', ridgereg.coef_)
print('Intercept: \n', ridgereg.intercept_)

Coefficient: 
 [ 0.          0.70341614  0.45674915]
Intercept: 
 0.808973342844
