In [157]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from numpy.linalg import inv, norm

In [158]:
# Dataset

housing = pd.read_csv('housing.csv')
housing = housing.dropna()

In [159]:
# Linear Regression Algorithms

def NormalizeMatrix(x_train):
    mean = x_train.mean(0)
    stddev = x_train.std(0)
    normal = (x_train - mean) / stddev
    return normal, mean, stddev

def ClosedForm(x_train, y_train):
    inverse = inv(x_train.T.dot(x_train))
    weight = inverse.dot(x_train.T).dot(y_train)
    return weight

def Error(x_train, y_train, weight):
    A = y_train - x_train.dot(weight)
    return A.T.dot(A)

def Bias(x_train):
    bias = np.ones(x_train.shape[0]).reshape(x_train.shape[0], 1)
    result = np.concatenate((bias, x_train), 1)
    return result

def GradientDescent(x_train, y_train, alpha=0.001, epsilon=0.0001):
    w0 = np.random.rand(x_train.shape[1]).reshape(x_train.shape[1], 1)
    w1 = np.zeros((x_train.shape[1], 1))
    i = 0
    while norm(w1 - w0) > epsilon:
        w0 = w1
        RSS = Error(x_train, y_train, w0)
        gRSS = (x_train.T.dot(x_train).dot(w0) - x_train.T.dot(y_train))   
        w1 = w0 - alpha * gRSS
        i += 1
    return w1

## Closed Form

### Simple Linear regression using 1 feature

In [160]:
x_train =  housing[['total_rooms']]
y_train =  housing['housing_median_age']
x_test = x_train.values
y_test = y_train.values.reshape(x_test.shape[0], 1)
x_test, mean, stddev = NormalizeMatrix(x_test)
x_test = Bias(x_test)
Closed = ClosedForm(x_test, y_test)

In [161]:
#Closed Form Solution
for i in range(len(Closed)):
    print("Theta ",i," = ",Closed[i])

Theta  0  =  [28.63309353]
Theta  1  =  [-4.54085018]


### Multiple Linear Regression using 2 – 4 features of your choice from the dataset

In [162]:
x_train =  housing[['total_rooms' , 'total_bedrooms' , 'population' ,'households']]
y_train =  housing['housing_median_age']
x_test = x_train.values
y_test = y_train.values.reshape(x_test.shape[0], 1)
x_test, mean, stddev = NormalizeMatrix(x_test)
x_test = Bias(x_test)
Closed = ClosedForm(x_test, y_test)

In [163]:
#Closed Form Solution
for i in range(len(Closed)):
    print("Theta ",i," = ",Closed[i])

Theta  0  =  [28.63309353]
Theta  1  =  [-5.89148702]
Theta  2  =  [-3.32168566]
Theta  3  =  [-0.92471942]
Theta  4  =  [5.69516642]


### Report evaluation metrics such as Rsquared error, Mean square error, Mean absolute error.

In [164]:
y_pred = list()
for i in range(0,len(x_test)):
  y = Closed[0]
  for j in range(1,len(Closed)):
    y += x_test[i][j-1]*Closed[j]
  y_pred.append(y)

In [165]:
# R Squared Error
print(r2_score(y_test, y_pred))

-91402775.44673042


In [166]:
# Mean Absolute Error
print(mean_absolute_error(y_test, y_pred))

120380.75437799614


In [167]:
# Mean Squared Error
print(mean_squared_error(y_test, y_pred))

14491526183.161247


## Stochastic Gradient Descent

### Simple Linear regression using 1 feature

In [168]:
x_train =  housing[['total_rooms']]
y_train =  housing['housing_median_age']
x_test = x_train.values
y_test = y_train.values.reshape(x_test.shape[0], 1)
x_test, mean, stddev = NormalizeMatrix(x_test)
x_test = Bias(x_test)
Gradient = GradientDescent(x_test, y_test, alpha=0.00001)

In [169]:
#Stochastic Gradient Solution
for i in range(len(Gradient)):
    print("Theta ",i," = ",Gradient[i])

Theta  0  =  [28.63278198]
Theta  1  =  [-4.54080078]


### Multiple Linear Regression using 2 – 4 features of your choice from the dataset

In [170]:
x_train =  housing[['total_rooms' , 'total_bedrooms' , 'population' ,'households']]
y_train =  housing['housing_median_age']
x_test = x_train.values
y_test = y_train.values.reshape(x_test.shape[0], 1)
x_test, mean, stddev = NormalizeMatrix(x_test)
x_test = Bias(x_test)
Gradient = GradientDescent(x_test, y_test, alpha=0.00001)

In [171]:
#Stochastic Gradient Solution
for i in range(len(Gradient)):
    print("Theta ",i," = ",Gradient[i])

Theta  0  =  [28.63309353]
Theta  1  =  [-5.89408306]
Theta  2  =  [-3.30197723]
Theta  3  =  [-0.92101057]
Theta  4  =  [5.67453129]


### Report evaluation metrics such as Rsquared error, Mean square error, Mean absolute error.

In [172]:
y_pred = list()
for i in range(0,len(x_test)):
  y = Gradient[0]
  for j in range(1,len(Gradient)):
    y += x_test[i][j-1]*Gradient[j]
  y_pred.append(y)

In [173]:
# R Squared Error
print(r2_score(y_test, y_pred))

-91483344.94908275


In [174]:
# Mean Absolute Error
print(mean_absolute_error(y_test, y_pred))

120433.7991624702


In [175]:
# Mean Squared Error
print(mean_squared_error(y_test, y_pred))

14504300139.251995


## Linear Regression Using Neural Network

In [186]:
import keras
from keras import models
from keras.layers import Dense

In [187]:
x_train =  housing[['total_rooms' , 'total_bedrooms' , 'population' ,'households']]
y_train =  housing['housing_median_age']
x_test = x_train.values
y_test = y_train.values.reshape(x_test.shape[0], 1)

In [188]:
model = models.Sequential()
model.add(Dense(128, activation='relu'))
model.add(Dense(256, activation='relu'))
model.add(Dense(1, activation='softmax'))

In [189]:
model.compile(optimizer='sgd', loss='mean_absolute_error', metrics=['accuracy'])

In [190]:
model.fit(x_train, y_train, batch_size = 128, epochs = 10, verbose=1)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f472a19bf50>

### Report evaluation metrics such as Rsquared error, Mean square error, Mean absolute error.

In [191]:
y_pred = model.predict(x_test)

In [192]:
# R Squared Error
print(r2_score(y_test, y_pred))

-4.816197367977022


In [193]:
# Mean Absolute Error
print(mean_absolute_error(y_test, y_pred))

27.633093525179856


In [194]:
# Mean Squared Error
print(mean_squared_error(y_test, y_pred))

922.133656340234
