# Project: Boston Dataset | Gradient Descent | Machine Learning

In [59]:
import numpy as np
import pandas as pd
from sklearn import datasets

## Importing the Data

In [60]:
data_train = np.genfromtxt("boston_train.csv", delimiter = ",")
data_test = np.genfromtxt("boston_test.csv", delimiter = ",")
data_test

array([[ 2.91816626, -0.48772236,  1.01599907, ...,  0.80657583,
        -1.59755122,  1.04106182],
       [-0.40339151, -0.48772236,  0.40609801, ..., -1.13534664,
         0.44105193, -0.89473812],
       [-0.4131781 , -0.48772236,  0.11573841, ...,  1.17646583,
         0.44105193, -0.50084979],
       ...,
       [-0.41001449,  2.08745172, -1.37837329, ..., -0.0719129 ,
         0.39094481, -0.68167397],
       [-0.40317611, -0.48772236, -0.37597609, ...,  1.13022958,
         0.34007019,  0.20142086],
       [-0.13356344, -0.48772236,  1.2319449 , ..., -1.73641788,
        -2.93893082,  0.48877712]])

## Converting Train Data to DataFrame

In [61]:
df = pd.DataFrame(data_train)
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13
0,-0.407850,-0.487722,-1.266023,-0.272599,-0.576134,1.239974,0.840122,-0.520264,-0.752922,-1.278354,-0.303094,0.410571,-1.097990,37.9
1,-0.407374,-0.487722,0.247057,-0.272599,-1.016689,0.001946,-0.838337,0.336351,-0.523001,-0.060801,0.113032,0.291169,-0.520474,21.4
2,0.125179,-0.487722,1.015999,-0.272599,1.367490,-0.439699,0.687212,-0.577309,1.661245,1.530926,0.806576,-3.795795,0.891076,12.7
3,0.028304,-0.487722,1.015999,-0.272599,1.859875,-0.047918,0.801005,-0.712836,1.661245,1.530926,0.806576,-0.066050,0.215438,19.9
4,-0.412408,-0.487722,-0.969827,-0.272599,-0.913029,-0.384137,-0.834781,0.300508,-0.752922,-0.957633,0.020560,0.431074,0.029007,22.5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
374,-0.204929,-0.487722,1.231945,3.668398,0.434551,2.161728,1.053485,-0.833960,-0.523001,-0.031105,-1.736418,0.361122,-1.504494,50.0
375,0.231398,-0.487722,1.015999,-0.272599,1.367490,0.215644,0.687212,-0.703186,1.661245,1.530926,0.806576,-2.812183,0.499991,14.3
376,-0.408311,-0.487722,0.247057,-0.272599,-1.016689,-0.206055,-0.809889,0.140451,-0.523001,-0.060801,0.113032,0.332066,-0.334043,20.8
377,-0.410620,-0.487722,-1.152214,-0.272599,-0.818007,0.068904,-1.826921,0.674814,-0.637962,0.129256,-0.719220,0.203235,-0.744752,22.6


## Separating Input and Output

In [62]:
df_input = df.iloc[:,0:13]
df_output = df.iloc[:,13]
df_output

0      37.9
1      21.4
2      12.7
3      19.9
4      22.5
       ... 
374    50.0
375    14.3
376    20.8
377    22.6
378    50.0
Name: 13, Length: 379, dtype: float64

## Converting the Data to Numpy Array

In [63]:
X_train = df_input.to_numpy()
Y_train = df_output.to_numpy()
X_train,Y_train

(array([[-0.40784991, -0.48772236, -1.2660231 , ..., -0.30309415,
          0.41057102, -1.09799011],
        [-0.40737368, -0.48772236,  0.24705682, ...,  0.1130321 ,
          0.29116915, -0.52047412],
        [ 0.1251786 , -0.48772236,  1.01599907, ...,  0.80657583,
         -3.79579542,  0.89107588],
        ...,
        [-0.40831101, -0.48772236,  0.24705682, ...,  0.1130321 ,
          0.33206621, -0.33404299],
        [-0.41061997, -0.48772236, -1.15221381, ..., -0.71922039,
          0.203235  , -0.74475218],
        [ 0.34290895, -0.48772236,  1.01599907, ...,  0.80657583,
          0.38787479, -1.35871335]]),
 array([37.9, 21.4, 12.7, 19.9, 22.5, 32.7, 23.2, 21.5, 20.5, 33.3, 23.1,
        20. , 50. , 25. , 20.2, 16.4, 22.4, 20.4, 18.4, 37.3, 23.1, 34.7,
        33.1, 28.7, 15.6, 18.2, 17.2, 31.5, 26.6, 24.1, 22.3, 19.5, 36.1,
        17.6, 33.4,  8.8,  9.7, 13.4, 39.8, 24.4, 17.8, 24.3, 24. , 23.1,
        13.1, 16.7, 22.8, 27.5, 14.6, 13.4, 11. , 19.4, 15. , 44.8, 29.6,
   

## Append 1 to Last

In [94]:
X_train = np.append(X_train, np.ones(X_train.shape[0]).reshape(-1, 1), axis=1)
X_test = np.append(data_test, np.ones(data_test.shape[0]).reshape(-1, 1), axis=1)
X_train,X_test

(array([[-0.40784991, -0.48772236, -1.2660231 , ..., -1.09799011,
          1.        ,  1.        ],
        [-0.40737368, -0.48772236,  0.24705682, ..., -0.52047412,
          1.        ,  1.        ],
        [ 0.1251786 , -0.48772236,  1.01599907, ...,  0.89107588,
          1.        ,  1.        ],
        ...,
        [-0.40831101, -0.48772236,  0.24705682, ..., -0.33404299,
          1.        ,  1.        ],
        [-0.41061997, -0.48772236, -1.15221381, ..., -0.74475218,
          1.        ,  1.        ],
        [ 0.34290895, -0.48772236,  1.01599907, ..., -1.35871335,
          1.        ,  1.        ]]),
 array([[ 2.91816626, -0.48772236,  1.01599907, ..., -1.59755122,
          1.04106182,  1.        ],
        [-0.40339151, -0.48772236,  0.40609801, ...,  0.44105193,
         -0.89473812,  1.        ],
        [-0.4131781 , -0.48772236,  0.11573841, ...,  0.44105193,
         -0.50084979,  1.        ],
        ...,
        [-0.41001449,  2.08745172, -1.37837329, ...,  

## Cost Function

In [79]:
def cost(X_train, y_train, m):
    result = 0
    N = X_train.shape[0]
    for i in range(N):
        x = X_train[i]
        y = Y_train[i]
        result += (1/N)*((y-((m*x).sum()))**2)
    return result

## Finding m using Gradient Descent

In [85]:
def gradient_descent():
    learning_rate = 0.1
    iterations = 1000
    m = find_M(X_train,Y_train,learning_rate,iterations)
    return m

def find_M(X_train,Y_train,learning_rate,iterations):
    m = np.array([0 for i in range(len(X_train[0]))])
    for i in range(iterations):
        m = step_gradient(X_train,Y_train,learning_rate,m)
        print("Cost: ",cost(X_train, Y_train,m))
    return m

def step_gradient(X_train,Y_train, learning_rate, m):
    M = X_train.shape[0]
    N = X_train.shape[1]
    m_slope = np.zeros(N)
    for i in range(M):
        x = X_train[i]
        y = Y_train[i]
        for j in range(N):
            m_slope[j] += (-2/M)*(y - (m*x).sum())*(x[j])
    new_m = m - learning_rate * m_slope
    return new_m

m = gradient_descent()
m

Cost:  374.24035597919686
Cost:  245.37909656079904
Cost:  165.10614192173668
Cost:  114.49508621191686
Cost:  82.43432664281796
Cost:  62.041704688310936
Cost:  49.0154746660482
Cost:  40.65519326019772
Cost:  35.25972835067592
Cost:  31.754121492629718
Cost:  29.457161634001743
Cost:  27.936007022144533
Cost:  26.914922412896374
Cost:  26.217802951980175
Cost:  25.731872822963105
Cost:  25.38469911771804
Cost:  25.129603525591076
Cost:  24.936384410518777
Cost:  24.785404792037365
Cost:  24.66381822190748
Cost:  24.56315514362143
Cost:  24.477776518190055
Cost:  24.403881200700226
Cost:  24.338867434896198
Cost:  24.280921148348455
Cost:  24.228749732407817
Cost:  24.18140930058519
Cost:  24.138192120101166
Cost:  24.09855285948923
Cost:  24.06205993819115
Cost:  24.028363158977218
Cost:  23.997171942483217
Cost:  23.968240497783352
Cost:  23.941357557710433
Cost:  23.916339140893513
Cost:  23.8930233395274
Cost:  23.871266478618274
Cost:  23.8509402167778
Cost:  23.83192930414061
Co

Cost:  23.466142749747355
Cost:  23.466140315368612
Cost:  23.466137945403638
Cost:  23.466135638148064
Cost:  23.466133391942588
Cost:  23.466131205171767
Cost:  23.46612907626295
Cost:  23.466127003685095
Cost:  23.46612498594765
Cost:  23.466123021599543
Cost:  23.466121109228077
Cost:  23.466119247457943
Cost:  23.466117434950178
Cost:  23.466115670401315
Cost:  23.46611395254236
Cost:  23.466112280137864
Cost:  23.466110651985144
Cost:  23.466109066913198
Cost:  23.466107523782224
Cost:  23.46610602148236
Cost:  23.46610455893323
Cost:  23.466103135083067
Cost:  23.466101748907853
Cost:  23.46610039941074
Cost:  23.466099085621224
Cost:  23.466097806594416
Cost:  23.466096561410556
Cost:  23.46609534917412
Cost:  23.46609416901336
Cost:  23.4660930200795
Cost:  23.46609190154635
Cost:  23.466090812609416
Cost:  23.466089752485654
Cost:  23.466088720412664
Cost:  23.466087715648158
Cost:  23.46608673746964
Cost:  23.466085785173572
Cost:  23.46608485807515
Cost:  23.466083955507617

Cost:  23.466050762324425
Cost:  23.46605076192502
Cost:  23.466050761536213
Cost:  23.466050761157696
Cost:  23.466050760789194
Cost:  23.466050760430406
Cost:  23.46605076008112
Cost:  23.46605075974111
Cost:  23.466050759410056
Cost:  23.46605075908779
Cost:  23.46605075877405
Cost:  23.466050758468583
Cost:  23.46605075817122
Cost:  23.46605075788172
Cost:  23.46605075759989
Cost:  23.46605075732552
Cost:  23.46605075705841
Cost:  23.466050756798328
Cost:  23.466050756545176
Cost:  23.4660507562987
Cost:  23.466050756058745
Cost:  23.46605075582515
Cost:  23.466050755597742
Cost:  23.466050755376333
Cost:  23.466050755160786
Cost:  23.466050754950942
Cost:  23.466050754746657
Cost:  23.46605075454778
Cost:  23.466050754354157
Cost:  23.46605075416565
Cost:  23.466050753982145
Cost:  23.46605075380352
Cost:  23.466050753629567
Cost:  23.466050753460237
Cost:  23.466050753295395
Cost:  23.466050753134933
Cost:  23.46605075297869
Cost:  23.466050752826572
Cost:  23.4660507526785
Cost:

Cost:  23.466050747232547
Cost:  23.466050747232476
Cost:  23.466050747232448
Cost:  23.466050747232366
Cost:  23.466050747232295
Cost:  23.466050747232234
Cost:  23.46605074723221
Cost:  23.466050747232153
Cost:  23.466050747232106
Cost:  23.46605074723203
Cost:  23.46605074723199
Cost:  23.466050747231954
Cost:  23.466050747231915
Cost:  23.466050747231865
Cost:  23.466050747231815
Cost:  23.466050747231783
Cost:  23.46605074723174
Cost:  23.46605074723171
Cost:  23.466050747231648
Cost:  23.466050747231623
Cost:  23.466050747231595
Cost:  23.466050747231552
Cost:  23.466050747231513


array([-9.38080610e-01,  7.41033999e-01,  1.16903020e-02,  7.80873861e-01,
       -2.17455732e+00,  2.35429667e+00,  1.23337955e-01, -2.95232364e+00,
        2.53296549e+00, -1.70290056e+00, -2.25151951e+00,  5.88354232e-01,
       -4.26368148e+00,  2.26772333e+01])

## Prediction on Test Data

In [97]:
def predict(X_test,m):
    N = X_test.shape[0]
    Y_pred = np.zeros(N)
    for i in range(N):
        Y_pred[i] = (m*X_test[i]).sum()
    return Y_pred
Y_pred = predict(X_test,m)

In [99]:
Y_pred

array([12.43328316, 29.03367206, 22.3716447 , 24.47778565, 20.60167146,
        2.72533227, 30.40000251, 24.86119999, 18.65725022, 23.53985837,
       24.11396868, 17.71143895, 17.44000499, 21.65356362, 42.31137754,
       23.84974577, 24.47573229, 27.53872772, 20.23606772, 31.15155568,
       23.78238084, 25.00979406, 33.95768518, 36.43515607, 32.04098399,
       16.71322559, 23.47176649, 32.93828179, 25.18069966, 33.71008693,
       16.88580248, 26.02760943, 23.27040055, 25.47758948, 15.00946626,
       29.58574992, 26.24821246, 20.37245806, 24.4368139 ,  9.44706905,
        8.38096595, 29.01392304, 29.59085455, 19.75757025, 20.37196749,
        3.14442592, 39.52420106, 25.71741224, 30.37729441, 16.79453238,
       17.89088614, 41.02574346, 17.57238762, 20.89662707, 15.59837735,
       21.41394828, 18.45436447, 23.155764  , 13.67245064, 17.23573843,
       15.02710403, 29.15131354, 25.17166513, 25.49749371, 17.21186634,
       17.42936975, 34.70372744, 17.01340773, 27.10724188, 22.54

In [100]:
np.savetxt('C:/Users/hp/Machine Learning Coding Ninjas/Milestone 2/Module 4 - Projects Gradient Descent/pred.csv', Y_pred)