In [102]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import os

class linear_regression():

    #    
    @classmethod
    def predict_y(self, x, w, b):
        """
        x : scalar / vector of input vales
        w : scalaer / vector of slope coefficients
        b : scalar of intercept coefficient

        linear regression formula: y(x) = w*x + b
        multiple regression formula: y(x1, x2, ..., xn) = w1*x1 + w2*x2 + ... + wn*xn  + b
        """
        return np.dot(x, w) + b
    
    #
    @classmethod
    def calc_cost(self, x, y, w, b):
        """
        x : scalar / vector of input vales
        y : scalaer / vector of true labels (output)
        w : scalaer / vector of slope coefficients
        b : scalar of intercept coefficient

        cost function formula: (∑i=1->m | (yi_pred - yi_actual)^2)/(2m)
        m - lengths of vectors x,y (number of rows in the dataset)
        """
        
        y_pred = self.predict_y(x=x, w=w, b=b)
        y_sigma = sum([(yp - ya)**2 for yp,ya in zip(y_pred, y)])
        cost = y_sigma / (2*len(x))
    
        return cost
    
    #
    @classmethod
    def calc_grad(self, x, y, w, b):
        """
        x : vector of input vales
        y : vector of true labels (output)
        w : scalaer / vector of slope coefficients
        b : scalar of intercept coefficient

        gradient formulas: 
                        w) (1/m) ∑i=1->m-1 | dJdw(w,b)i = (1/m) ∑i=1->m-1 | (f(xi)-yi)xi
                        b) (1/m) ∑i=1->m-1 | dJdb(w,b)i = (1/m) ∑i=1->m-1 | (f(xi)-yi)
        """
        # Input information
        x_input = np.array(x)
        y_true = np.array(y)
        m = x_input.shape[0]

        # Predicted y vector and it's corresponding error
        y_pred = self.predict_y(x_input, w, b)
        y_error = y_pred - y_true

        # intercept coeficient derrivitive
        gb = np.sum(y_error) / m 

        # For multiple regression
        if x_input.ndim > 1:
            # Transforming the x_input matrix to achive our goal by matrix multipication
            # (f(xi)-yi)xi -> np.dot(x,w) = (x1w1 + x2w2 + ... + xnwn)
            gw = np.dot(x_input.T, y_error) / m

        # For regular regression
        else:
            gw = np.sum(y_error * x_input) / m
   
        return gw, gb

    @classmethod
    def grad_desc(self,x , y, w_init, b_init, alpha, n_inter):
        """
        x : vector of input vales
        y : vector of true labels (output)
        w : scalaer / vector of slope coefficients
        b : scalar of intercept coefficient
        alpha : scalaer of the learning rate
        n_inter : scalar (int) -> number of gradient descent iteration

        gradient formulas (for each itiration): 
                        w) wj = wj - alpha * dJdw
                        b) b = b - alpha * dJdb
        """

        w, b = w_init, b_init
        cost_list = []
        for i in range(n_inter):
            #x_input = np.array(x) #making sure that the x input is np.array
            dJw, dJb = self.calc_grad(x, y, w, b)
            cost_list.append(self.calc_cost(x, y, w, b))
            

            w = w - alpha * dJw
            b = b - alpha * dJb

        return w, b

In [103]:
housing = pd.read_csv(os.path.join("datasets", "housing.csv"), index_col=None)
x_train = housing.area
y_train = housing.price

In [104]:
# Control data (from the coursera exam, need to replicate the results here)
x_train = np.array([ 6.1101,  5.5277,  8.5186,  7.0032,  5.8598,  8.3829,  7.4764,
                     8.5781,  6.4862,  5.0546,  5.7107, 14.164 ,  5.734 ,  8.4084,
                     5.6407,  5.3794,  6.3654,  5.1301,  6.4296,  7.0708,  6.1891,
                     20.27  ,  5.4901,  6.3261,  5.5649, 18.945 , 12.828 , 10.957 ,
                     13.176 , 22.203 ,  5.2524,  6.5894,  9.2482,  5.8918,  8.2111,
                     7.9334,  8.0959,  5.6063, 12.836 ,  6.3534,  5.4069,  6.8825,
                     11.708 ,  5.7737,  7.8247,  7.0931,  5.0702,  5.8014, 11.7   ,
                     5.5416,  7.5402,  5.3077,  7.4239,  7.6031,  6.3328,  6.3589,
                     6.2742,  5.6397,  9.3102,  9.4536,  8.8254,  5.1793, 21.279 ,
                     14.908 , 18.959 ,  7.2182,  8.2951, 10.236 ,  5.4994, 20.341 ,
                     10.136 ,  7.3345,  6.0062,  7.2259,  5.0269,  6.5479,  7.5386,
                     5.0365, 10.274 ,  5.1077,  5.7292,  5.1884,  6.3557,  9.7687,
                     6.5159,  8.5172,  9.1802,  6.002 ,  5.5204,  5.0594,  5.7077,
                     7.6366,  5.8707,  5.3054,  8.2934, 13.394 ,  5.4369])

y_train = np.array([17.592  ,  9.1302 , 13.662  , 11.854  ,  6.8233 , 11.886  ,
                    4.3483 , 12.     ,  6.5987 ,  3.8166 ,  3.2522 , 15.505  ,
                    3.1551 ,  7.2258 ,  0.71618,  3.5129 ,  5.3048 ,  0.56077,
                    3.6518 ,  5.3893 ,  3.1386 , 21.767  ,  4.263  ,  5.1875 ,
                    3.0825 , 22.638  , 13.501  ,  7.0467 , 14.692  , 24.147  ,
                    -1.22   ,  5.9966 , 12.134  ,  1.8495 ,  6.5426 ,  4.5623 ,
                    4.1164 ,  3.3928 , 10.117  ,  5.4974 ,  0.55657,  3.9115 ,
                    5.3854 ,  2.4406 ,  6.7318 ,  1.0463 ,  5.1337 ,  1.844  ,
                    8.0043 ,  1.0179 ,  6.7504 ,  1.8396 ,  4.2885 ,  4.9981 ,
                    1.4233 , -1.4211 ,  2.4756 ,  4.6042 ,  3.9624 ,  5.4141 ,
                    5.1694 , -0.74279, 17.929  , 12.054  , 17.054  ,  4.8852 ,
                    5.7442 ,  7.7754 ,  1.0173 , 20.992  ,  6.6799 ,  4.0259 ,
                    1.2784 ,  3.3411 , -2.6807 ,  0.29678,  3.8845 ,  5.7014 ,
                    6.7526 ,  2.0576 ,  0.47953,  0.20421,  0.67861,  7.5435 ,
                    5.3436 ,  4.2415 ,  6.7981 ,  0.92695,  0.152  ,  2.8214 ,
                    1.8451 ,  4.2959 ,  7.2029 ,  1.9869 ,  0.14454,  9.0551 ,
                    0.61705])


# Test prediction function
test_pred = None
print(f"test label prediction: {test_pred}")

# Test cost function
test_cost = linear_regression.calc_cost(x=x_train, y=y_train, w=2, b=1)
print(f"test cost function: {test_cost} (control cost = 75.203)")

# Test gradient function
test_grad = linear_regression.calc_grad(x=x_train, y=y_train, w=0, b=0)
print(f"test gradient calculation: {test_grad} (control gradient -> w = 65.32884975, b = -5.83913505154639)")

# Test gradient-descent function
# Linear regersion
test_graddes = linear_regression().grad_desc(x_train, y_train, w_init=0, b_init=0, alpha=0.01, n_inter=1500)
print(f"test gradient descent calculation: {test_graddes} (control ceof -> w,b found by gradient descent: 1.166362350335582 -3.63029143940436)")

# Test gradient-descent function
# Multiple regersion
x_train2 = np.array([[2104, 5, 1, 45], [1416, 3, 2, 40], [852, 2, 1, 35]])
y_train2 = np.array([460, 232, 178])
w_init2 = np.zeros(x_train2[0,:].shape[0])
b_init2 = 0


test_graddes = linear_regression().grad_desc(x_train2, y_train2, w_init=w_init2, b_init=b_init2, alpha=5.0e-7, n_inter=1000)
print(f"test gradient descent calculation: {test_graddes} (control ceof -> w,b found by gradient descent: [0.20396569  0.00374919 -0.0112487  -0.0658614 ] -0.002235407530932535")

test label prediction: None
test cost function: 75.20338497891959 (control cost = 75.203)
test gradient calculation: (np.float64(-65.32884974555671), np.float64(-5.839135051546393)) (control gradient -> w = 65.32884975, b = -5.83913505154639)
test gradient descent calculation: (np.float64(1.166362350335582), np.float64(-3.6302914394043597)) (control ceof -> w,b found by gradient descent: 1.166362350335582 -3.63029143940436)
test gradient descent calculation: (array([ 0.20396569,  0.00374919, -0.0112487 , -0.0658614 ]), np.float64(-0.0022354075309325345)) (control ceof -> w,b found by gradient descent: [0.20396569  0.00374919 -0.0112487  -0.0658614 ] -0.002235407530932535


    
    @classmethod
    def calc_grad(self, x, y, w, b):
        """
        x : vector of input vales
        y : vector of true labels (output)
        w : scalaer / vector of slope coefficients
        b : scalar of intercept coefficient

        gradient formulas: 
                        w) (1/m) ∑i=1->m-1 | dJdw(w,b)i = (1/m) ∑i=1->m-1 | (f(xi)-yi)xi
                        b) (1/m) ∑i=1->m-1 | dJdb(w,b)i = (1/m) ∑i=1->m-1 | (f(xi)-yi)
        """

        x_input = np.array(x)
        y_pred = self.predict_y(x_input, w=w, b=b)
        y_diff = y_pred - y

        dJb = y_diff
        gb = sum(dJb)/len(x)

        # For regular linear regression
        if x_input.ndim == 1:
            dJw = y_diff * x_input 
            gw = sum(dJw)/len(x)

        # For multiple regression
        else:
            gw = np.zeros((len(w),))

            for j in range(x_input.shape[1]):
                xj = x_input[:,j]
                wj = w[j]

                y_pred = self.predict_y(x=x, w=wj, b=b)
                dJw = (y_pred - y) * xj 
                
                gw[j] = sum(dJw)/len(xj)
                
        return gw, gb