# Building Linear Regression with multiple variables: Step by Step

In [1]:
import numpy as np
import pandas as pd
import time

In [2]:
data = pd.read_table('ex1data2.txt', sep=',', header=None, names=['size', 'number of bedrooms', 'price'], dtype=np.float32)
data

Unnamed: 0,size,number of bedrooms,price
0,2104.0,3.0,399900.0
1,1600.0,3.0,329900.0
2,2400.0,3.0,369000.0
3,1416.0,2.0,232000.0
4,3000.0,4.0,539900.0
5,1985.0,4.0,299900.0
6,1534.0,3.0,314900.0
7,1427.0,3.0,198999.0
8,1380.0,3.0,212000.0
9,1494.0,3.0,242500.0


In [3]:
X = data.loc[:,['size','number of bedrooms']].values
Y = data.loc[:,['price']].values

In [4]:
mu = np.mean(X, axis=0)
sigma = np.std(X, axis=0)

X_train = (X-mu)/sigma
Y_train = Y

In [5]:
class LinearRegression():
    
    def __call__(self, inputs):
        m = inputs.shape[0]
        X_ = np.append(np.ones((m,1)), inputs, axis=1)
        H = np.dot(X_, self.theta)
        return H
    
    def predict(self, inputs):
        H = self(inputs)
        return H
       
    def costFunc(self, X, Y):
        m = X.shape[0]
        H = self(X)
        J = (1/(2*m))*np.sum((H - Y)*(H - Y))
        return J
    
    def gradFunc(self, X, Y):
        m = X.shape[0]
        X_ = np.append(np.ones((m,1)), X, axis=1)
        H = self(X)
        grad = (1/m)*np.dot(X_.T,H - Y)
        return grad
                      
    def fit(self, X_train, Y_train, epochs=400, learning_rate=0.1):
        self.theta = np.random.normal(loc=0.0, scale=0.05, size=(X_train.shape[1]+1, Y_train.shape[1]))
        print('Training...')
        for epoch in range(epochs):
            start_time = time.time()
            grad = self.gradFunc(X_train, Y_train)
            self.theta = self.theta - learning_rate*grad
            loss = self.costFunc(X_train, Y_train)
            now = time.time()
            duration = now - start_time
            print('Epochs {}/{} - Loss: {}'.format(epoch+1, epochs, loss))
            print('----- {}s -----'.format(np.round(1000*duration)/1000))

In [6]:
model = LinearRegression()

model.fit(X_train, Y_train)

Training...
Epochs 1/400 - Loss: 53268140314.21582
----- 0.0s -----
Epochs 2/400 - Loss: 43388998806.73816
----- 0.0s -----
Epochs 3/400 - Loss: 35457369228.38772
----- 0.0s -----
Epochs 4/400 - Loss: 29080510906.537365
----- 0.0s -----
Epochs 5/400 - Loss: 23947047668.190033
----- 0.0s -----
Epochs 6/400 - Loss: 19809550601.434906
----- 0.0s -----
Epochs 7/400 - Loss: 16470994878.965338
----- 0.0s -----
Epochs 8/400 - Loss: 13774191972.246231
----- 0.0s -----
Epochs 9/400 - Loss: 11593516117.169756
----- 0.0s -----
Epochs 10/400 - Loss: 9828406634.161062
----- 0.0s -----
Epochs 11/400 - Loss: 8398249939.493309
----- 0.0s -----
Epochs 12/400 - Loss: 7238337264.5793705
----- 0.0s -----
Epochs 13/400 - Loss: 6296663901.041657
----- 0.0s -----
Epochs 14/400 - Loss: 5531388862.885388
----- 0.0s -----
Epochs 15/400 - Loss: 4908814378.227572
----- 0.0s -----
Epochs 16/400 - Loss: 4401775686.837344
----- 0.0s -----
Epochs 17/400 - Loss: 3988355528.9577394
----- 0.0s -----
Epochs 18/400 - Loss

In [7]:
2*model.costFunc(X_train, Y_train)

4086560075.2724414

In [8]:
Y_pred = model.predict(X_train)
Y_pred

array([[356283.1110963 ],
       [286120.93164502],
       [397489.4728382 ],
       [269244.18380728],
       [472277.84976714],
       [330979.02120125],
       [276933.02565925],
       [262037.48753131],
       [255494.58053364],
       [271364.60055667],
       [324714.5405145 ],
       [341805.20041959],
       [326492.02668592],
       [669293.21083291],
       [239902.98763698],
       [374830.38437761],
       [255879.96225637],
       [235448.24494548],
       [417846.48166841],
       [476593.39194265],
       [309369.11395043],
       [334951.62382885],
       [286677.77285056],
       [327777.17570718],
       [604913.36988346],
       [216515.59176839],
       [266353.01665963],
       [415030.01647784],
       [369647.33427811],
       [430482.40233492],
       [328130.30112111],
       [220070.55996721],
       [338635.60816365],
       [500087.73568795],
       [306756.3643398 ],
       [263429.59054516],
       [235865.87911143],
       [351442.99013992],
       [6414

In [9]:
Y_train

array([[399900.],
       [329900.],
       [369000.],
       [232000.],
       [539900.],
       [299900.],
       [314900.],
       [198999.],
       [212000.],
       [242500.],
       [239999.],
       [347000.],
       [329999.],
       [699900.],
       [259900.],
       [449900.],
       [299900.],
       [199900.],
       [499998.],
       [599000.],
       [252900.],
       [255000.],
       [242900.],
       [259900.],
       [573900.],
       [249900.],
       [464500.],
       [469000.],
       [475000.],
       [299900.],
       [349900.],
       [169900.],
       [314900.],
       [579900.],
       [285900.],
       [249900.],
       [229900.],
       [345000.],
       [549000.],
       [287000.],
       [368500.],
       [329900.],
       [314000.],
       [299000.],
       [179900.],
       [299900.],
       [239500.]], dtype=float32)

In [10]:
x = np.array([[1650, 3]])
x_norm = (x-mu)/sigma
model.predict(x_norm)

array([[293081.46468585]])