In [1]:
import numpy as np
import pandas as pd
import random
import math


In [2]:

data = pd.read_csv("data.csv")

price = data['price']
area = data['lotsize']
bedrooms = data['bedrooms']
bathroom = data['bathrms']


In [3]:
mean_area = np.mean(area)
max_area = max(area)
min_area = min(area)
area_scaled = []
for i in area:
	area_scaled.append((i - mean_area) / (max_area - min_area))

In [4]:
area_train = area[:383]
bathroom_train = bathroom[:383]
bedrooms_train = bedrooms[:383]
price_train = []
price_train = price[:383]

In [5]:
def Slope(coeff, features_train, price_train, ind):
	error = 0
	for i in range(len(features_train)):
		itr = 0
		for j in range(len(coeff)):
			itr = itr + coeff[j] * features_train[i][j]
		error += (itr - price_train[i]) * features_train[i][ind]
	return error


In [33]:
print("Using batch gradient without feature scaling")
features_train = []
for i in range(383):
	features_train.append([1, area[i], bedrooms[i], bathroom[i]])
no_scaling = 0.00000001

coeff = [0, 0, 0, 0]
print("Initial coefficients: ")
print(coeff)
lis1 = []
for i in range(5000):
	temp = coeff.copy()
	for j in range(len(coeff)):
		temp[j] = temp[j] - ((no_scaling / len(features_train)) * (Slope(coeff, features_train, price_train, j)))
	coeff = temp.copy()
 
print("Final coefficients are:")
print(coeff)

price_test = []
features_test = []
for i in range(383, len(price)):
	features_test.append([1, area[i], bedrooms[i], bathroom[i]])
	price_test.append(price[i])
 
error = 0
for i in range(len(features_test)):
	predicted = 0
	for j in range(len(coeff)):
	  	predicted = predicted + coeff[j] * features_test[i][j]
	error += abs(predicted - price_test[i]) / price_test[i]
error = (error / len(features_test)) * 100
print("Mean absolute percentage error is : " + str(error))



Using batch gradient without feature scaling
Initial coefficients: 
[0, 0, 0, 0]
Final coefficients are:
[0.21941839685016323, 12.481729406733438, 0.866559580223384, 0.4899364308390147]
Mean absolute percentage error is : 29.976669944053334


In [34]:
print("Using batch gradient with feature scaling")
features_train = []
for i in range(383):
	features_train.append([1, area_scaled[i], bedrooms[i], bathroom[i]])
no_scaling = 0.001

coeff = [0, 0, 0, 0]
print("Initial coefficients: ")
print(coeff)
for i in range(5000):
	temp = coeff.copy()
	for j in range(len(coeff)):
		temp[j] = temp[j] - ((no_scaling / len(features_train)) * (Slope(coeff, features_train, price_train, j)))
	coeff = temp.copy()

print("Final coefficients are:")
print(coeff)

price_test = []
features_test = []
for i in range(383, len(price)):
	features_test.append([1, area_scaled[i], bedrooms[i], bathroom[i]])
	price_test.append(price[i])

error = 0
for i in range(len(features_test)):
	predicted = 0
	for j in range(len(coeff)):
	  	predicted = predicted + coeff[j] * features_test[i][j]
	error += abs(predicted - price_test[i]) / price_test[i]
error = (error / len(features_test)) * 100
print("Mean absolute percentage error is : " + str(error))

Using batch gradient with feature scaling
Initial coefficients: 
[0, 0, 0, 0]
Final coefficients are:
[7730.872053867435, 8069.264303687423, 11077.015405893277, 18485.569122447192]
Mean absolute percentage error is : 20.34046542497449


In [32]:
def SlopeStoch(coeff,features_train,value_actual,ind):
	itr = 0
	for j in range(len(coeff)):
		itr = itr + coeff[j]*features_train[j]
	return (itr - value_actual) * features_train[ind]

print("Using Stochastic gradient without feature scaling")

features_train = []
for i in range(383):
	features_train.append([1, area[i], bedrooms[i], bathroom[i]])
 
no_scaling = 0.0000000003
coeff = [0, 0, 0, 0]
print("Initial coefficients: ")
print(coeff)

for iter in range(10):
	for i in range(len(price_train)):
		temp = coeff.copy()
		for j in range(4):
			temp[j] = temp[j] - (no_scaling * (SlopeStoch(coeff, features_train[i], price_train[i], j)))
		coeff = temp.copy()

print("Final coefficients are:")
print(coeff)

price_test = []
features_test = []
for i in range(383, len(price)):
	features_test.append([1, area[i], bedrooms[i], bathroom[i]])
	price_test.append(price[i])
 
error = 0
for i in range(len(features_test)):
	predicted = 0
	for j in range(len(coeff)):
	  	predicted = predicted + coeff[j] * features_test[i][j]
	error += abs(predicted - price_test[i]) / price_test[i]
error = (error / len(features_test)) * 100
print("Mean absolute percentage error is : " + str(error))



Using Stochastic gradient without feature scaling
Initial coefficients: 
[0, 0, 0, 0]
Final coefficients are:
[0.0071803400406398946, 12.301337356957236, 0.026237149684201787, 0.014175780186431276]
Mean absolute percentage error is : 29.96004919775569


In [23]:



print("Using Stochastic gradient with feature scaling")

features_train = []
for i in range(383):
	features_train.append([1, area_scaled[i], bedrooms[i], bathroom[i]])

scaling = 0.005
coeff = [0, 0, 0, 0]
print("Initial coefficients: ")
print(coeff)

for iter in range(10):
	for i in range(len(price_train)):
		temp = coeff.copy()
		for j in range(4):
			temp[j] = temp[j] - (scaling * (SlopeStoch(coeff, features_train[i], price_train[i], j)))
		coeff = temp.copy()

print("Final coefficients are:")
print(coeff)

Mean absolute percentage error is : 121188.06057013628
Using Stochastic gradient with feature scaling
Initial coefficients: 
[0, 0, 0, 0]
Final coefficients are:
[18648.663069990776, 15073.501985961251, 15766.862790309351, 22357.23427068568]


In [28]:
print("Using Minibatch gradient without feature scaling for batch size = 20")
features_train = []
for i in range(383):
	features_train.append([1, area[i], bedrooms[i], bathroom[i]])

batch_size = 20;
scaling = 0.000000001
coeff = [0, 0, 0, 0]
batches_count = math.ceil(len(price_train) / batch_size)
flag = False
if (len(price_train) % batch_size == 0):
	flag = True;

for epoch in range(30):
	for batch in range(batches_count):
		value_sum = [0, 0, 0, 0]
		for j in range(len(coeff)):
			for i in range(batch_size):
				if (batch * batch_size + i == len(features_train)):
					break
				value_predicted = 0.0
				for wj in range(len(coeff)):
					value_predicted += coeff[wj] * features_train[batch * batch_size + i][wj]
				value_predicted -= price_train[batch * batch_size + i]
				value_predicted *= features_train[batch * batch_size + i][j]
				value_sum[j] += value_predicted;

		if (not flag and batch == batches_count - 1):
			for j in range(len(value_sum)):
				coeff[j] -= (value_sum[j] / (len(price_train) % batch_size)) * scaling
		else:
			for j in range(len(value_sum)):
				coeff[j] -= (value_sum[j] / batch_size) * scaling
print("Final coefficients are:")
print(coeff)

price_test = []
features_test = []
for i in range(383, len(price)):
	features_test.append([1, area[i], bedrooms[i], bathroom[i]])
	price_test.append(price[i])

error = 0
for i in range(len(features_test)):
	predicted = 0
	for j in range(len(coeff)):
	  	predicted = predicted + coeff[j] * features_test[i][j]
	error += abs(predicted - price_test[i]) / price_test[i]
error = (error / len(features_test)) * 100
print("Mean absolute percentage error is : " + str(error))




Using Minibatch gradient without feature scaling for batch size = 20
Final coefficients are:
[0.004878043860877857, 12.316492614601154, 0.016968931159065084, 0.008676023446934251]
Mean absolute percentage error is : 29.959938152860634


In [26]:
print("Using Minibatch gradient with feature scaling for batch size = 20")

features_train = []
for i in range(383):
	features_train.append([1, area_scaled[i], bedrooms[i], bathroom[i]])

batch_size = 20;
scaling = 0.002
coeff = [0, 0, 0, 0]
batches_count = math.ceil(len(price_train) / batch_size)
flag = False
if (len(price_train) % batch_size == 0):
	flag = True;

for epoch in range(30):
	for batch in range(batches_count):
		value_sum = [0, 0, 0, 0]
		for j in range(len(coeff)):
			for i in range(batch_size):
				if (batch * batch_size + i == len(features_train)):
					break
				value_predicted = 0.0
				for wj in range(len(coeff)):
					value_predicted += coeff[wj] * features_train[batch * batch_size + i][wj]
				value_predicted -= price_train[batch * batch_size + i]
				value_predicted *= features_train[batch * batch_size + i][j]
				value_sum[j] += value_predicted;

		if (not flag and batch == batches_count - 1):
			for j in range(len(value_sum)):
				coeff[j] -= (value_sum[j] / (len(price_train) % batch_size)) * scaling
		else:
			for j in range(len(value_sum)):
				coeff[j] -= (value_sum[j] / batch_size) * scaling
print("Final coefficients are:")
print(coeff)

price_test = []
features_test = []
for i in range(383, len(price)):
	features_test.append([1, area_scaled[i], bedrooms[i], bathroom[i]])
	price_test.append(price[i])

error = 0
for i in range(len(features_test)):
	predicted = 0
	for j in range(len(coeff)):
	  	predicted = predicted + coeff[j] * features_test[i][j]
	error += abs(predicted - price_test[i]) / price_test[i]
error = (error / len(features_test)) * 100
print("Mean absolute percentage error is : " + str(error))


Using Minibatch gradient with feature scaling for batch size = 20
Final coefficients are:
[6340.552295015755, 2827.87646125345, 15916.90159915714, 10968.772912396124]
Mean absolute percentage error is : 20.275262089497147
