In [274]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.graph_objs as go

In [275]:
def mean(feature):
    n = len(feature)
    return(sum(feature)/n)

In [276]:
def std(feature, mean):
    n = len(feature)
    diff = [0]*n
    for i in range(n):
        diff[i] = (feature[i] - mean)**2
    
    return(((sum(diff)/(n))**0.5))

In [277]:
def normalise (feature, mean, std):
    return((feature - mean)/std)

In [278]:
def mse(weights, feature_1, feature_2, target):
    n = len(feature_1)
    MSE = 0
    for i in range(n):
        MSE += ((weights[0] + (weights[1]*feature_1[i]) + (weights[2]*feature_2[i])) - target[i])**2
    return(MSE/n)

In [279]:
def multiple_linear_regression(sq_feet, rooms, price, learning_rate = 0.1, epoch = 50):
    n = len(price)
    
    w0 = 0
    w1 = 0
    w2 = 0

    costfn = [0]*epoch

    for i in range(epoch):
        del_costfnBydel_w0 = 0
        del_costfnBydel_w1 = 0
        del_costfnBydel_w2= 0

        for j in range(n):
            y_pred = w0 + (w1*sq_feet[j]) + (w2*rooms[j])
            costfn[i] += (y_pred - price[j])**2
            del_costfnBydel_w0 += (y_pred - price[j])
            del_costfnBydel_w1 += (sq_feet[j]*(y_pred - price[j]))
            del_costfnBydel_w2 += (rooms[j]*(y_pred - price[j]))


        w0 += -learning_rate*(del_costfnBydel_w0/n)
        w1 += -learning_rate*(del_costfnBydel_w1/n)
        w2 += -learning_rate*(del_costfnBydel_w2/n)
        
    #Iter = list(range(epoch))
    #plt.plot(Iter, costfn)
    #plt.show()
        
    return([w0, w1, w2])

In [280]:
def cross_validation(sq_feet, rooms, price, k=4):
    weights = [[]]*k
    error = [0]*k
    pivot = int(n/k)+1

    for i in range(k):
        train_sq_feet = sq_feet[:(i*pivot)] + sq_feet[((i+1)*pivot)-1:] 
        test_sq_feet = sq_feet[(i*pivot):((i+1)*pivot)-1]

        train_rooms = rooms[:(i*pivot)] + rooms[((i+1)*pivot)-1:] 
        test_rooms = rooms[(i*pivot):((i+1)*pivot)-1]

        train_price = price[:(i*pivot)] + price[((i+1)*pivot)-1:] 
        test_price = price[(i*pivot):((i+1)*pivot)-1]

        weights[i] = multiple_linear_regression(train_sq_feet, train_rooms, train_price)
        error[i] = mse(weights[i], test_sq_feet, test_rooms, test_price)

    ind = error.index(min(error))
    return(weights[ind])

### Reading the input file

In [281]:
file1 = open("ex1data2.txt","r")
lines = file1.readlines()

n = len(lines)
sq_feet = [0]*n
rooms = [0]*n
price = [0]*n

for i in range(n):
    content = lines[i].split(",")
    sq_feet[i] = int(content[0])
    rooms[i] = int(content[1])
    price[i] = int(content[2])

### Cross Validation and Linear Regression using Gradient Descent

In [282]:
mean_sq_feet = mean(sq_feet)
mean_rooms = mean(rooms)

std_sq_feet = std(sq_feet, mean_sq_feet)
std_rooms = std(rooms, mean_rooms)

for i in range(n):
    sq_feet[i] = normalise(sq_feet[i], mean_sq_feet, std_sq_feet)
    rooms[i] = normalise(rooms[i], mean_rooms, std_rooms)

k = 4
model = cross_validation(sq_feet, rooms, price, k)

### Input

In [283]:
sqfeet = 4000
room = 5


nor_sqfeet = normalise(sqfeet, mean_sq_feet, std_sq_feet)
nor_rooms = normalise(room, mean_rooms, std_rooms)

w0, w1, w2 = model
predicted_price = w0 + (w1*nor_sqfeet) + (w2*nor_rooms)
print(predicted_price)

597944.4618524647


### Visualization

In [284]:


x1 = sq_feet
x2 = rooms
x1, x2 = np.meshgrid(x1, x2)
y_pred = model[0] + model[1] * x1 + model[2] * x2

# Create trace for original data
trace1 = go.Scatter3d(
    x=sq_feet,
    y=rooms,
    z=price,
    mode='markers',
    marker=dict(
        color='blue',
        size=5,
        opacity=0.8
    ),
    name='Original data'
)

# Create trace for regression plane
trace2 = go.Surface(
    x=x1,
    y=x2,
    z=y_pred,
    opacity=0.6,
    colorscale='Viridis',
    name='Regression plane'
)

# Define layout
layout = go.Layout(
    title='Multiple Linear Regression',
    scene=dict(
        xaxis=dict(title='Area'),
        yaxis=dict(title='Room'),
        zaxis=dict(title='Price')
    )
)

# Create figure and plot
fig = go.Figure(data=[trace1, trace2], layout=layout)
fig.show()
