In [2]:
# coding: utf-8

# ### Design a housing price predictor taking only floor area (plot size), number of bedrooms, 
# and number of bathrooms into considerations. Out of total 546 data , 
# you may take 70% for designing the predictor and 30% for validating the design. 
# The predictor design should be done using the following methods
# 
# ### d) Implement the LWR algorithm on the Housing Price data set with different tau values.	
# Find out the tau value which will provide the best fit predictor and hence compare its results with a) , b) and c) above.	

# In[112]:

# Whole Assignment will be done using numpy only 
import numpy as np

# pandas is only used to read the csv file since there is no function that allows us to read string data in numpy
import pandas as pd

#Reading data using pandas
data = pd.read_csv('Housing Price data set.csv')
data


# #### Now we will convert the pandas columns into numpy array because we are not allowed to use any other library.
# #### Note: We will take only those columns into consideration on which we are asked to do prediction.

# In[113]:


''' To convert them into numpy array, 
first we will take series object using data[column name] 
and then convert it into list using list() function 
and then finally we will create the numpy array.'''

# Feature Columns
PlotSize = np.array(list(data["lotsize"]))
Bedrooms = np.array(list(data["bedrooms"]))
Bathrooms = np.array(list(data["bathrms"]))

#Target Column
Price = np.array(list(data["price"]))


# In[114]:


'''
This function takes X and Y columns as input where
X are the values in feature columns and Y is the value in 
target column. It applies locally weighted regression and 
predicts the value acc to the algorithm
'''
#Appying the LWR algorithm for each sample to obtain the predicted value
#And finally adding the value into the prediction list
# LWR = kernel*(W(T).X - Y)^2  // for 1 sample
# kernel = e^(-(xi - X)^2/ 2 (Tau)^2)

def LocallyWeightedLR(X, Y, Tau):
    prediction=[] #ans
    
    #Added 1 in each row as done in Normal Equation function
    X1=[]
    for i in range(len(X)):
        X1.append(list(np.insert(X[i],0,1)))
    X=np.array(X1)
    
    for i in range(X.shape[0]):
        xi=X[i]
        X_T=np.transpose(X)
        W=kernel(X, xi, Tau)
        X_T_W=X_T * W
        X_T_WX=np.matmul(X_T_W, X)
        InverseX_T_WX=np.linalg.pinv(X_T_WX)
        X_T_WXXTW=np.matmul(InverseX_T_WX, X_T_W)
        X_T_WXXTWY=np.matmul(X_T_WXXTW, Y)
        X_T_WXXTWYT=np.transpose(X_T_WXXTWY)
        prediction.append(X_T_WXXTWYT.dot(xi))
    return prediction


def kernel(X, xi, Tau):
    return np.exp(-np.sum((xi - X) ** 2, axis = 1) / (2 * Tau * Tau))

'''To call the function first we have to merge the numpy arrays into 1
So this function merges cells so that data for each index becomes as row for that part only'''
def mergeCells(cell):
    n=len(cell[0])
    m=len(cell)
    result=np.ones((n,m),dtype=int)
    for i in range(n):
        for j in range(m):
            result[i][j]=cell[j][i]
    return result


# In[115]:


#Taking all the target columns as X
X=mergeCells([PlotSize[:], Bedrooms[:], Bathrooms[:]])

#Taking all the values of price column for Y
Y=Price[:]

#This function takes Y and Y predicted and calculates error
def predict(Y, Y_prediction):
    error=0
    for i in range(len(Y)):
        error+=abs((Y[i]-Y_prediction[i])/Y[i])
    error=error/len(Y)
    error=error*100
    return error


# In[117]:


# % Error for different tau values
# 
print("Tau\t\t% Error")
max_iteration=100
req_values=[]
for i in range(1,max_iteration):
    req_values.append(i/100)
for tau in req_values:
    predictionLWR=LocallyWeightedLR(X,Y,tau)
    print(tau,end="\t\t")
    print(round(predict(Y, predictionLWR),10))


# # CONCLUSION

# ### Result: % error is less when using LWR Algorithm.
# 
# #### In comparison to (a), (b) and (c) parts, my observation 
# is that the minimum error is in LWR Algorithm.


Tau		% Error
0.01		5.4073055366
0.02		5.4073055366
0.03		5.4073055366
0.04		5.4073055366
0.05		5.4073055366
0.06		5.4073055366
0.07		5.4073055366
0.08		5.4073055366
0.09		5.4073055366
0.1		5.4073055366
0.11		5.4073055366
0.12		5.4073055366
0.13		5.4073055366
0.14		5.4073055367
0.15		5.407305539
0.16		5.4073055735
0.17		5.4073058916
0.18		5.40730581
0.19		5.4073066121
0.2		5.407309645
0.21		5.407318661
0.22		5.4073414847
0.23		5.4073921643
0.24		5.4074927386
0.25		5.4076749742
0.26		5.4079808196
0.27		5.4084611379
0.28		5.4091738961
0.29		5.410181229
0.3		5.4115459213
0.31		5.4133279115
0.32		5.4155808927
0.33		5.4183493919
0.34		5.4216669157
0.35		5.425554518
0.36		5.4300201383
0.37		5.435059261
0.38		5.4406555421
0.39		5.4467822116
0.4		5.4534040637
0.41		5.460479344
0.42		5.4682072565
0.43		5.4763114547
0.44		5.484734308
0.45		5.4934263391
0.46		5.502340902
0.47		5.511435445
0.48		5.5206722959
0.49		5.5300193017
0.5		5.5394501432
0.51		5.5488804064
0.52		5.5585456021
0.53		5.56839288