In [1]:
%matplotlib inline
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split

In [2]:
boston = load_boston()

In [3]:
boston_df=pd.DataFrame(boston.data,columns=boston.feature_names)
boston_df['Price']=boston.target

In [4]:
X = boston_df.drop('Price', axis=1)
X.head(4)

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT
0,0.00632,18.0,2.31,0.0,0.538,6.575,65.2,4.09,1.0,296.0,15.3,396.9,4.98
1,0.02731,0.0,7.07,0.0,0.469,6.421,78.9,4.9671,2.0,242.0,17.8,396.9,9.14
2,0.02729,0.0,7.07,0.0,0.469,7.185,61.1,4.9671,2.0,242.0,17.8,392.83,4.03
3,0.03237,0.0,2.18,0.0,0.458,6.998,45.8,6.0622,3.0,222.0,18.7,394.63,2.94


In [5]:
Y = boston_df.iloc[: , [13]]

In [6]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.25, random_state = 3)

print('{}   {}   {}   {}'.format(X_train.shape, X_test.shape, Y_train.shape, Y_test.shape))

(379, 13)   (127, 13)   (379, 1)   (127, 1)


In [7]:
def biasing_X(X):
    N = X.shape
    arr = np.ones((N[0], 1))
    X = np.hstack((arr, X))
    w = np.zeros((N[1]+1, 1))
    return X, w

In [8]:
def normalisation(X_t):
    N = X_t.shape
    mean = np.mean(X_t)
    std = np.std(X_t)
    X_t = (X_t-mean)/std
    return X_t

In [9]:
X_norm = normalisation(X_train)
X_biased, theta = biasing_X(X_norm)
X_bias = np.array(X_biased)

In [10]:
def hypo(X, W):
    return np.dot(X, W)

In [14]:
def cost(X, Y, W):
    m = X.shape[1]
    #print(m)
    n = X.shape[0]
    sum = 0
    Temp = hypo(X, W) - Y
    J = (np.dot(Temp.T, Temp))/(2*m)
    #print(J)
    return J

def cost_ridge(X, Y, W, lamda):
    m = X.shape[1]
    #print(m)
    n = X.shape[0]
    sum = 0
    Temp = hypo(X, W) - Y
    J = ((np.dot(Temp.T, Temp))/(2*m)) + lamda*(W**2)/(2*m)
    #print(J)
    return J

def cost_lasso(X, Y, W, lamda):
    m = X.shape[1]
    n = X.shape[0]
    sum = 0
    Temp = hypo(X, W) - Y
    J = ((np.dot(Temp.T, Temp))/(2*m)) + (lamda*((W**2)**0.5))/(2*m)
    return J

In [15]:
def gradient_descent(X, W, Y, Alpha, Step):
    N = X.shape[1]
    print(N)
    J = []
    initial = cost(X, Y, W)
    J.append(initial)
    #np.dot(x.T, (y_hat-y)))
    #print(J)
    for i in range(Step):
        Te = hypo(X, W)
        Te = Te - Y
        K = X.T@Te
        #print(i)
        #print(K.shape)
        #print(K.T)
        W = W - (Alpha/N)*K
        Temp = cost(X, Y, W)
        J.append(Temp)
        #print(Temp)
    return J, W

In [16]:
def gradient_descent_ridge(X, W, Y, Alpha, Step, lamda):
    N = X.shape[1]
    print(N)
    J = []
    initial = cost_ridge(X, Y, W, lamda)
    J.append(initial)
    #np.dot(x.T, (y_hat-y)))
    #print(J)
    for i in range(Step):
        Te = hypo(X, W)
        Te = Te - Y
        K = X.T@Te
        #print(i)
        #print(K.shape)
        #print(K.T)
        W = W - (Alpha/N)*K + (lamda/N)*W
        Temp = cost_ridge(X, Y, W, lamda)
        J.append(Temp)
        #print(Temp)
    return J, W

def gradient_descent_lasso(X, W, Y, Alpha, Step, lamda):
    N = X.shape[1]
    #print(N)
    J = []
    initial = cost_ridge(X, Y, W, lamda)
    J.append(initial)
    #np.dot(x.T, (y_hat-y)))
    print(J)
    for i in range(Step):
        Te = hypo(X, W)
        Te = Te - Y
        K = X.T@Te
        #print(i)
        #print(K.shape)
        #print(K.T)
        W = W - (Alpha/N)*K + (lamda/(2*N))*(W/((W**2)**0.5))
        Temp = cost_ridge(X, Y, W, lamda)
        J.append(Temp)
        #print(Temp)
    return J, W

In [17]:
J1, W1 = gradient_descent(X_bias, theta, Y_train, 0.001, 1000)

14


In [18]:
J2, W2 = gradient_descent_ridge(X_bias, theta, Y_train, 0.001, 1000, 0.0001) #low alpha value

14


In [19]:
x_1 = (X_bias.T@Y_train)
print(x_1.shape)
x_2 = X_bias.T@X_bias
print(x_2.shape)
theta1 = np.linalg.pinv(x_2)@x_1
theta1 #value from formula

(14, 1)
(14, 14)


Unnamed: 0,Price
0,22.525594
1,-1.130313
2,1.151977
3,-0.226244
4,0.856078
5,-1.836763
6,2.336972
7,0.168943
8,-2.766992
9,2.810391


In [21]:
X_bias_test,_ = biasing_X(normalisation(X_test))
print(X_bias_test.shape)
Y_ridge_pred = X_bias_test@W2

(127, 14)


In [22]:
Y_normal_pred = X_bias_test@W1

In [23]:
Y_pred_ridge = Y_ridge_pred
Y_pred_normal = Y_normal_pred
mse_ridge = (np.sum(Y_pred_ridge - Y_test)**2)/(Y_test.shape[0])
mse_normal = (np.sum(Y_pred_normal - Y_test)**2)/(Y_test.shape[0])

In [24]:
print(mse_ridge)
print(mse_normal)

Price    25.01596
dtype: float64
Price    25.152529
dtype: float64


In [25]:
y_bar = np.mean(Y_test)
y_bar


Price    22.554331
dtype: float64

In [26]:
def error(Y_pp):
    R_error =  ((Y_pp - y_bar).T@(Y_pp - y_bar)) /((Y_test - y_bar).T@(Y_test- y_bar))
    return R_error

In [27]:
error(Y_pred_normal)

Unnamed: 0,Price
Price,0.682423


In [28]:
error(Y_pred_ridge)

Unnamed: 0,Price
Price,0.68264


In [29]:
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Ridge

linReg = LinearRegression()
linReg.fit(X_train, Y_train)

ridge = Ridge(alpha=0.01)
ridge.fit(X_train, Y_train)

r100 = Ridge(alpha=100) 
r100.fit(X_train, Y_train)

Ridge(alpha=100, copy_X=True, fit_intercept=True, max_iter=None,
      normalize=False, random_state=None, solver='auto', tol=0.001)

In [30]:
train_score= linReg.score(X_train, Y_train)
test_score= linReg.score(X_test, Y_test)
Ridge_train_score = ridge.score(X_train,Y_train)
Ridge_test_score = ridge.score(X_test, Y_test)
Ridge_train_score100 = r100.score(X_train,Y_train)
Ridge_test_score100 = r100.score(X_test, Y_test)

In [31]:
print("linear regression train score:", train_score)
print("linear regression test score:", test_score)
print("ridge regression train score low alpha:", Ridge_train_score)
print("ridge regression test score low alpha:", Ridge_test_score)
print("ridge regression train score high alpha:", Ridge_train_score100)
print("ridge regression test score high alpha:", Ridge_test_score100)

linear regression train score: 0.7280659533306794
linear regression test score: 0.7668895327325101
ridge regression train score low alpha: 0.7280652903801514
ridge regression test score low alpha: 0.7668140172741872
ridge regression train score high alpha: 0.7046318055844061
ridge regression test score high alpha: 0.7220612059649227


In [32]:
from sklearn.linear_model import Lasso

lasso = Lasso()
lasso.fit(X_train,Y_train)
train_score=lasso.score(X_train,Y_train)
test_score=lasso.score(X_test,Y_test)
coff = np.sum(lasso.coef_!=0)

In [33]:
print("training score:", train_score)
print("test score: ", test_score)
print("number of features used: ", coff)

training score: 0.6690290782600831
test score:  0.6667924487957375
number of features used:  11
