In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import sklearn

In [None]:
housing = pd.DataFrame(pd.read_csv("Housing.csv"))
housing.head()

In [None]:
m = len(housing)
m

In [None]:
housing.shape

In [None]:
varlist = ['mainroad', 'guestroom', 'basement', 'hotwaterheating', 'airconditioning', 'prefarea']

# Defining the map function
def binary_map(x):
    return x.map({'yes': 1, "no": 0})

# Applying the function to the housing list
housing[varlist] = housing[varlist].apply(binary_map)
           
# Check the housing dataframe now
housing.head()

In [None]:
#Splitting the Data into Training and Testing Sets
from sklearn.model_selection import train_test_split
np.random.seed(0)
df_train, df_test = train_test_split(housing, train_size = 0.8, test_size = 0.2, random_state = 100)

df_train.shape

In [None]:
df_test.shape

In [None]:
num_vars = ['area', 'bedrooms', 'bathrooms', 'stories', 'parking','price']
df_Newtrain = df_train[num_vars]
df_Newtest = df_test[num_vars]
df_Newtrain.head()

In [None]:
df_Newtrain.shape

In [None]:
y_Newtrain = df_Newtrain.pop('price')
X_Newtrain = df_Newtrain

In [None]:
X_Newtrain.head()

In [None]:
y_Newtrain.head()

In [None]:
y = y_Newtrain.values
print('y = ', y[: 10])

In [None]:
X1 = df_Newtrain.values[:, 0]       
X2 = df_Newtrain.values[:, 1]               
X3 = df_Newtrain.values[:, 2]              
X4 = df_Newtrain.values[:, 3]               
X5 = df_Newtrain.values[:, 4]   

In [None]:
m = len(X_Newtrain)         # size of training set
X_0 = np.ones((m, 1))        
X_0 [: 5]

In [None]:
X_1 = X1.reshape(m, 1)
X_2 = X2.reshape(m, 1)
X_3 = X3.reshape(m, 1)
X_4 = X4.reshape(m, 1)
X_5 = X5.reshape(m, 1)

In [None]:
X = np.hstack((X_0, X_1, X_2, X_3, X_4, X_5))
X [: 5]

In [None]:
theta = np.zeros(6)
theta

In [None]:
y_Newtest = df_Newtest.pop('price')
X_Newtest = df_Newtest

In [None]:
X_Newtest.head()

In [None]:
y_Newtest.head()

In [None]:
y_test = y_Newtest.values
print('y_test = ', y_test[: 10])

In [None]:
# preparing the testing/validation set
X1_test = df_Newtest.values[:, 0]                 
X2_test = df_Newtest.values[:, 1]                 
X3_test = df_Newtest.values[:, 2]                
X4_test = df_Newtest.values[:, 3]               
X5_test = df_Newtest.values[:, 4]     

In [None]:
m_test = len(X_Newtest)             # size of validation set
X_0_test = np.ones((m_test, 1))     
X_0_test [: 5]

In [None]:
X_1_test = X1_test.reshape(m_test, 1)
X_2_test = X2_test.reshape(m_test, 1)
X_3_test = X3_test.reshape(m_test, 1)
X_4_test = X4_test.reshape(m_test, 1)
X_5_test = X5_test.reshape(m_test, 1)

In [None]:
# Stacking X_0_test through X_5_test horizontally
# This is the final X Matrix for validation
X_test = np.hstack((X_0_test, X_1_test, X_2_test, X_3_test, X_4_test, X_5_test))
X_test [: 5]

In [None]:
# defining function for computing the cost for linear regression

def compute_cost(X, y, theta, m):
    predictions = X.dot(theta)
    errors = np.subtract(predictions, y)
    sqrErrors = np.square(errors)
    J = 1 / (2 * m) * np.sum(sqrErrors)
    return J

In [None]:
# computing the cost for initial theta values

cost = compute_cost(X, y, theta, m)
cost

In [None]:
# gradient descent algorithm

def gradient_descent(X, y, theta, alpha, iterations):
    cost_history = np.zeros(iterations)
    cost_test = np.zeros(iterations)
    for i in range(iterations):
        predictions = X.dot(theta)
        errors = np.subtract(predictions, y)
        sum_delta = (alpha / m) * X.transpose().dot(errors);
        theta = theta - sum_delta;
        cost_history[i] = compute_cost(X, y, theta, m)                     # loss for training set
        cost_test[i] = compute_cost(X_test, y_test, theta, m_test)         # loss for validation set
    
    return theta, cost_history, cost_test

In [None]:
theta = [0., 0., 0., 0., 0., 0.]
iterations = 200;
alpha = 0.01

In [None]:
# Computing final theta values and cost/losses for training and validation set

theta, cost_history, cost_test = gradient_descent(X, y, theta, alpha, iterations)
print('Final value of theta=', theta)
print('cost_history =', cost_history)
print('cost_test =', cost_test)

In [None]:
plt.plot(range(1, iterations + 1),cost_history, color='green', label= 'Loss for Training Set')
plt.plot(range(1, iterations + 1),cost_test, color='red', label= 'Loss for Validation Set')
plt.rcParams["figure.figsize"] = (10,6)
plt.grid()
plt.legend()
plt.xlabel('Number of Iterations')
plt.ylabel('Cost (J)')
plt.title('Convergence of Gradient Descent')