In [172]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

df = pd.read_csv("data.csv")

In [173]:
df.head(10)

Unnamed: 0,date,price,bedrooms,bathrooms,sqft_living,sqft_lot,floors,waterfront,view,condition,sqft_above,sqft_basement,yr_built,yr_renovated,street,city,statezip,country
0,2014-05-02 00:00:00,313000.0,3.0,1.5,1340,7912,1.5,0,0,3,1340,0,1955,2005,18810 Densmore Ave N,Shoreline,WA 98133,USA
1,2014-05-02 00:00:00,2384000.0,5.0,2.5,3650,9050,2.0,0,4,5,3370,280,1921,0,709 W Blaine St,Seattle,WA 98119,USA
2,2014-05-02 00:00:00,342000.0,3.0,2.0,1930,11947,1.0,0,0,4,1930,0,1966,0,26206-26214 143rd Ave SE,Kent,WA 98042,USA
3,2014-05-02 00:00:00,420000.0,3.0,2.25,2000,8030,1.0,0,0,4,1000,1000,1963,0,857 170th Pl NE,Bellevue,WA 98008,USA
4,2014-05-02 00:00:00,550000.0,4.0,2.5,1940,10500,1.0,0,0,4,1140,800,1976,1992,9105 170th Ave NE,Redmond,WA 98052,USA
5,2014-05-02 00:00:00,490000.0,2.0,1.0,880,6380,1.0,0,0,3,880,0,1938,1994,522 NE 88th St,Seattle,WA 98115,USA
6,2014-05-02 00:00:00,335000.0,2.0,2.0,1350,2560,1.0,0,0,3,1350,0,1976,0,2616 174th Ave NE,Redmond,WA 98052,USA
7,2014-05-02 00:00:00,482000.0,4.0,2.5,2710,35868,2.0,0,0,3,2710,0,1989,0,23762 SE 253rd Pl,Maple Valley,WA 98038,USA
8,2014-05-02 00:00:00,452500.0,3.0,2.5,2430,88426,1.0,0,0,4,1570,860,1985,0,46611-46625 SE 129th St,North Bend,WA 98045,USA
9,2014-05-02 00:00:00,640000.0,4.0,2.0,1520,6200,1.5,0,0,3,1520,0,1945,2010,6811 55th Ave NE,Seattle,WA 98115,USA


In [174]:
df.drop(["date", "country", "statezip", "city", "street", "yr_built", "yr_renovated"], axis=1, inplace=True)
df.head(5)

Unnamed: 0,price,bedrooms,bathrooms,sqft_living,sqft_lot,floors,waterfront,view,condition,sqft_above,sqft_basement
0,313000.0,3.0,1.5,1340,7912,1.5,0,0,3,1340,0
1,2384000.0,5.0,2.5,3650,9050,2.0,0,4,5,3370,280
2,342000.0,3.0,2.0,1930,11947,1.0,0,0,4,1930,0
3,420000.0,3.0,2.25,2000,8030,1.0,0,0,4,1000,1000
4,550000.0,4.0,2.5,1940,10500,1.0,0,0,4,1140,800


In [175]:
data = np.array(df, dtype=float)

In [176]:
def normalize(data):
    for i in range(0,data.shape[1]):
        data[:,i] = ((data[:,i] - np.mean(data[:,i]))/np.std(data[:, i]))        

In [177]:
normalize(data)

In [178]:
Y = data[:, :1]
X = data[:, 1:]

In [179]:
x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size = 0.2, random_state = 42)

In [180]:
def hypothesis(x, t):
    return np.dot(x, t)

In [181]:
def cost_function(t, x, y):
    return np.sum(np.dot((hypothesis(x, t)-y).T, (hypothesis(x, t)-y))) / (2 * y.shape[0])

In [182]:
def gradient_descent(t, x, y, iters, alpha):
    m = x.shape[0]
    for i in range(iters):
        yhat = hypothesis(x, t)
        cost = (1/m) * np.dot(x.T, (yhat - y))
        t = t - (alpha)*cost
        
    return t

In [183]:
theta = np.zeros((X.shape[1], 1))
theta = gradient_descent(theta, x_train, y_train, 1000, 0.1)
print(theta)

[[-0.09259946]
 [-0.01512826]
 [ 0.2480647 ]
 [-0.03444169]
 [ 0.04555302]
 [ 0.06099914]
 [ 0.07441418]
 [ 0.06638398]
 [ 0.20753307]
 [ 0.12929163]]


In [184]:
y_pred = hypothesis(x_test, theta)

In [185]:
print(y_pred)

[[-3.23910653e-01]
 [-2.30531717e-01]
 [ 8.73260148e-01]
 [ 1.64010174e-01]
 [-2.69184721e-01]
 [ 2.34763970e-01]
 [-4.50962631e-02]
 [-1.25914528e-01]
 [-3.52337773e-02]
 [-2.02479368e-02]
 [ 3.36293587e-01]
 [-3.50195858e-01]
 [ 4.90313645e-01]
 [-4.27562241e-02]
 [-2.61016411e-01]
 [ 3.42170146e-01]
 [ 5.66032654e-02]
 [-3.90467751e-02]
 [ 1.00208537e+00]
 [ 6.14127553e-01]
 [ 1.52019692e+00]
 [ 1.86218028e-01]
 [ 1.74247036e-01]
 [-7.04110836e-02]
 [-5.26972582e-01]
 [-5.33974734e-01]
 [ 2.36480397e-01]
 [ 5.95660564e-01]
 [-3.65495261e-01]
 [ 5.86075506e-01]
 [ 2.06378191e+00]
 [-2.15918879e-01]
 [ 1.06942623e+00]
 [-1.35717974e-01]
 [-5.84326464e-01]
 [-2.66905290e-01]
 [ 5.04542679e-01]
 [ 4.01181350e-01]
 [-4.27155466e-01]
 [ 7.72951930e-02]
 [-2.34079361e-01]
 [-3.91032440e-01]
 [-2.27290328e-01]
 [-2.41237780e-01]
 [-4.07565604e-01]
 [-4.80871979e-01]
 [-7.29213241e-02]
 [-1.09472571e-02]
 [ 4.79242833e-01]
 [-4.40995523e-01]
 [ 1.25652151e+00]
 [-3.03744725e-01]
 [-1.8539650