In [1]:
import seaborn as sns
import pandas as pd
import numpy as np 
import matplotlib.pyplot as plt
import warnings
warnings.simplefilter('ignore')
plt.style.use('dark_background')
from sklearn.metrics import mean_squared_error as mse

In [2]:
data = pd.read_csv("house1.csv")

In [3]:
data

Unnamed: 0,ID,Date House was Sold,Sale Price,No of Bedrooms,No of Bathrooms,Flat Area (in Sqft),Lot Area (in Sqft),No of Floors,Waterfront View,No of Times Visited,...,Overall Grade,Area of the House from Basement (in Sqft),Basement Area (in Sqft),Age of House (in Years),Renovated Year,Zipcode,Latitude,Longitude,Living Area after Renovation (in Sqft),Lot Area after Renovation (in Sqft)
0,7129300520,14 October 2017,221900.0,3,1.00,1180.0,5650.0,1.0,No,,...,7,1180.0,0,63,0,98178.0,47.5112,-122.257,1340.0,5650
1,6414100192,14 December 2017,538000.0,3,2.25,2570.0,7242.0,2.0,No,,...,7,2170.0,400,67,1991,98125.0,47.7210,-122.319,1690.0,7639
2,5631500400,15 February 2016,180000.0,2,1.00,770.0,10000.0,1.0,No,,...,6,770.0,0,85,0,98028.0,47.7379,-122.233,2720.0,8062
3,2487200875,14 December 2017,604000.0,4,3.00,1960.0,5000.0,1.0,No,,...,7,1050.0,910,53,0,98136.0,47.5208,-122.393,1360.0,5000
4,1954400510,15 February 2016,510000.0,3,2.00,1680.0,8080.0,1.0,No,,...,8,1680.0,0,31,0,98074.0,47.6168,-122.045,1800.0,7503
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21604,263000018,14 May 2017,360000.0,3,2.50,1530.0,1131.0,3.0,No,,...,8,1530.0,0,9,0,98103.0,47.6993,-122.346,1530.0,1509
21605,6600060120,15 February 2016,400000.0,4,2.50,2310.0,5813.0,2.0,No,,...,8,2310.0,0,4,0,98146.0,47.5107,-122.362,1830.0,7200
21606,1523300141,14 June 2017,402101.0,2,0.75,1020.0,1350.0,2.0,No,,...,7,1020.0,0,9,0,98144.0,47.5944,-122.299,1020.0,2007
21607,291310100,15 January 2016,400000.0,3,2.50,1600.0,2388.0,2.0,No,,...,8,1600.0,0,14,0,98027.0,47.5345,-122.069,1410.0,1287


In [4]:
sale_price=data['Sale Price'].head(30)
flat_area=data['Flat Area (in Sqft)'].head(30)
sample_data=pd.DataFrame({'sale_price':sale_price,'flat_area':flat_area})
sample_data

Unnamed: 0,sale_price,flat_area
0,221900.0,1180.0
1,538000.0,2570.0
2,180000.0,770.0
3,604000.0,1960.0
4,510000.0,1680.0
5,1129575.0,5420.0
6,257500.0,1715.0
7,291850.0,1060.0
8,229500.0,1780.0
9,323000.0,1890.0


In [5]:
#intialising parameters

In [6]:
def param_init(Y):
    '''
    Y=target variable,
    returns the initialised value of m and c
    '''
    m = 0.1
    c = Y.mean()
    return m, c

In [7]:
#generate predictions

In [8]:
def generate_predictions(m,c,X):
    '''
    X=independent variable,
    generated by the variable m,c,x
    '''
    prediction=[]
    for x in X:
        pred = (m * x) + c
        prediction.append(pred)
        
    return prediction    

In [9]:
#calculating the cost

In [10]:
def compute_cost(prediction,Y):
    '''
    returns the mean squared error of prediction and Y
    '''
    cost=np.sum(((prediction - Y)**2/len(Y)))
    return cost

In [11]:
#calculating the gradient

In [12]:
def gradients(prediction ,Y,X):
    '''
    returns gradient corresponding to m and c
    '''
    n=len(Y)
    Gm = 2/n * np.sum((prediction -Y) * X)
    Gc = 2/n * np.sum((prediction - Y))
    return Gm,Gc

In [13]:
def param_update(m_old, c_old, Gm_old, Gc_old, alpha):
    '''
    update and return the new value of m and c
    '''
    m_new=m_old - alpha*Gm_old
    c_new=c_old - alpha*Gc_old
    return m_new,c_new

In [14]:
def result(m, c, X, Y, cost, predictions, i):
    '''
    print and plot the final result obtained from gradient descent
    '''
    ##if the gradient descent converged to the optimum value before max_iter
    if i < max_iter - 1:
        print("***gradient descent has converged at iteration{}***").format(i)
    else:
        print("***result after",max_iter,'iteration is:*****')
        
    ####plotting th final result
    plt.figure(figsize=(14,7), dpi = 120)
    plt.scatter(X, Y, color='red', label= 'data points')
    label = 'final regression line: m = {};c = {}'.format(str(m),str(c))
    plt.plot(X, predictions, color='green',label = label)
    plt.xlabel('flat area')
    plt.ylabel('sale_price')
    plt.title('final regression line')
    plt.legend()

In [15]:
##scaling the dataset using standard scaler
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
sale_price = scaler.fit_transform(sample_data['sale_price'].values.reshape(-1,1))
flat_area = scaler.fit_transform(sample_data['flat_area'].values.reshape(-1,1))

#declaring parameters
max_iter = 1000
cost_old = 0
alpha = 0.01

#initialising the values of m,c
m , c = param_init(sale_price)

#gradient descent in action
for i in range(0 , max_iter):
    
    ##generating predictions
    predictions = generate_predictions(m, c, flat_area)
    
    ##step 3:calculating cost
    cost_new = compute_cost(predictions, sale_price)
    
    ##checking if GD converged
    if abs(cost_new - cost_old) < 10**(-7):
        break
        
    ##calculating gradients
    Gm, Gc = gradients(predictions, sale_price ,
                       flat_area)
    
    ##updating parameters m and c
    m, c = param_update(m, c, Gm, Gc, alpha)
    
    ##display result after every 20 iterations
    if i%20 == 0:
        print('After iteration',i ,':m =', m,'; c =',c,'; Cost =', cost_new)
        
    #updating the value of cost_old to cost_new
    cost_old =cost_new
    
    #final results
    result(m, c, flat_area, sale_price, cost_new, predictions, i)
    

After iteration 0 :m = 0.11316346413633305 ; c = 1.1849780416165836e-16 ; Cost = 0.85836535863667
***gradient descent has converged at iteration{}***


AttributeError: 'NoneType' object has no attribute 'format'