In [3]:
import numpy as np
import matplotlib.pyplot as plt 
import random
import pandas as pd 

%matplotlib inline

from sklearn.datasets import load_boston
boston_dataset = pd.DataFrame(load_boston().data, columns=load_boston().feature_names)
boston_dataset['MEDV'] = load_boston().target
dataframe_size = int(len(boston_dataset) * 0.8)
x_train=boston_dataset.RM[:dataframe_size]
x_test=boston_dataset.RM[dataframe_size:]
y_train=boston_dataset.MEDV[:dataframe_size]
y_test=boston_dataset.MEDV[dataframe_size:]


In [4]:
def polynomialRegression(H,Y):
    W = np.dot(np.dot((np.linalg.inv(np.dot(H.T, H))), H.T),Y) #Polynomial Regrssion function
    return W

def GradientDescent(H,Y,lamda):
    n, m = H.shape
    I = np.identity(m)
    coeff=np.dot(np.dot(np.linalg.inv(np.dot(H.T, H) + lamda * I), H.T), Y)
    return coeff

def CrossValidation(train_copy,lamdavalue):
    fold_size = int(len(train_copy) * 0.2)
    fold=0
    rss=0
    for i in range(1,6):
        validation_set= train_copy[fold:i*fold_size]
        train_set=train_copy.drop(train_copy[fold:i*fold_size].index,axis=0)
        W=GradientDescent(train_set.RM,train_set.MEDV,lamdavalue)
        #print(W)
        rss+=sum((validation_set.MEDV - np.dot(validation_set.RM,W) )** 2)
        fold=i*fold_size
        i+=1
    #print((rss/5))
    return ((rss/5))



def RidgeRegression(train_copy):
    validation_Error=[]
    bestfit_W=[]
    bestfit_lamda=0
    
    lamdavalue = [(10 ** (-10)),(10 ** (-8)),(10 ** (-6)),(10 ** (-4)),(10 ** (-2)),(10 ** (-1)),10,(10 ** (2))]
    for i in range(0,len(lamdavalue)):
        (avg_validationError) = CrossValidation(train_copy, lamdavalue[i])
        validation_Error.append(avg_validationError)
        if min(validation_Error) == avg_validationError:
            bestfit_lamda = lamdavalue[i]
    result = pd.DataFrame({'validationError':validation_Error,'lamdaValue':lamdavalue})
    return (bestfit_lamda)

# 2nd order Input

In [5]:
H_secondOrder = pd.concat([x_train, x_train **2], axis=1) 
H_Test2 = pd.concat([x_test, x_test **2], axis=1)
training_2ndOrder = pd.concat([H_secondOrder, y_train], axis=1)
training_2ndOrder=training_2ndOrder.sample(frac=1)

W_2 = polynomialRegression(H_secondOrder,y_train)# Coeffecients for polynomial regression
print("The coeffecients for polynomial regression" , W_2)
bestfit_lamda2 = RidgeRegression(training_2ndOrder)
bestfit_W2 = GradientDescent(H_secondOrder,y_train,bestfit_lamda2) # trains model to get Coeffecients for the best fit lamda
print("The coeffecients for ridge regression: {0}\nThe best fit lamda value: {1}".format(bestfit_W2,bestfit_lamda2))

MSE_2ndPolynomial = (sum((y_test-np.dot(H_Test2,W_2)) ** 2))/len(x_test)
MSE_2ndRidge = (sum((y_test-np.dot(H_Test2,bestfit_W2)) ** 2))/len(x_test)
print("MSE for polynomial regression: {0}\nMSE for ridge regression: {1}".format(MSE_2ndPolynomial,MSE_2ndRidge))

The coeffecients for polynomial regression [-2.03391734  0.91107707]
The coeffecients for ridge regression: [-1.94078329  0.89693875]
The best fit lamda value: 10
MSE for polynomial regression: 68.7444509199822
MSE for ridge regression: 68.91697581509428


## 9th Order Input

In [6]:
H_ninethOrder = pd.concat([x_train ** i for i in range(1,10) ],axis=1) 
H_Test9 = pd.concat([x_test ** i for i in range(1,10) ],axis=1)
training_9thOrder = pd.concat([H_ninethOrder, y_train], axis=1)
training_9thOrder=training_9thOrder.sample(frac=1)

W_9 = polynomialRegression(H_ninethOrder,y_train)# Coeffecients for polynomial regression
print("The coeffecients for polynomial regression" , W_9)
bestfit_lamda9 = RidgeRegression(training_9thOrder)
bestfit_W9 = GradientDescent(H_ninethOrder,y_train,bestfit_lamda9) # trains model to get Coeffecients for the best fit lamda
print("The coeffecients for ridge regression: {0}\nThe best fit lamda value: {1}".format(bestfit_W9,bestfit_lamda9))

MSE_9thPolynomial = (sum((y_test-np.dot(H_Test9,W_9)) ** 2))/len(x_test)
MSE_9thRidge = (sum((y_test-np.dot(H_Test9,bestfit_W9)) ** 2))/len(x_test)
print("MSE for polynomial regression: {0}\nMSE for ridge regression: {1}".format(MSE_9thPolynomial,MSE_9thRidge))

The coeffecients for polynomial regression [-3.78781470e+03  5.90395778e+03 -3.85618187e+03  1.39273241e+03
 -3.06057745e+02  4.20786020e+01 -3.54558909e+00  1.67830201e-01
 -3.42435078e-03]
The coeffecients for ridge regression: [ 8.75280921e+00  1.04141918e+01  3.77178123e+00 -8.34221523e+00
  3.51998148e+00 -6.98761529e-01  7.35635212e-02 -3.93504635e-03
  8.29065261e-05]
The best fit lamda value: 0.1
MSE for polynomial regression: 50.103138834675036
MSE for ridge regression: 63.23984577079102
