## Importing Some Libraries

In [1]:
import numpy as np
import pandas as pd
from sklearn import preprocessing as p
from sklearn.ensemble import GradientBoostingRegressor

## Loading Training and Testing Data....

In [2]:
## Training Data
data = np.loadtxt("boston_training_data.csv", delimiter = ",")
print("shape of training dataset ::", data.shape)
x_train = data[ : , 0 : -1]
y_train = data[ : , -1]
print(x_train.shape, y_train.shape)

## Testing Data
x_test = np.loadtxt("boston_testing_data.csv", delimiter = ",")
print("shape of training dataset ::", x_test.shape)

shape of training dataset :: (379, 14)
(379, 13) (379,)
shape of training dataset :: (127, 13)


## My Implemented Generic Gradient Descent

In [3]:
def score(Y_pred, Y_actual):
    u = ((Y_actual-Y_pred)**2).sum()
    v = ((Y_actual-Y_actual.mean())**2).sum()
    cod = 1 - u/v
    return cod

def cost(X, Y, m):
    total_cost = 0
    N = len(X)
    c = np.array([[1] for i in range(N)])
    X = np.append(X, c, axis = 1)
    print(X.shape)
    for i in range(N):
        x = X[i]
        y = Y[i]
        total_cost += (1/N) * (y - (m * x).sum()) ** 2
    return total_cost

def predict(X, m):
    N = len(X)
    c = np.array([[1] for i in range(N)])
    X = np.append(X, c, axis = 1)
    y_predict = []
    for i in range(N):
        y_predict.append((X[i] * m).sum())
    return y_predict

def step_gradient(X, Y, learning_rate, m):
    N = len(X)
    M = len(X[0])
    new_m = []
    for j in range(M):
        m_slope = 0
        for i in range(N):
            x = X[i, : ] ## ith row
            y = Y[i] ##  output
            m_slope += (-2/N) * (y - (m * x).sum()) * x[j]
        m_slope = m[j] - learning_rate * m_slope
        new_m.append(m_slope)
    return new_m 
    
    
def gd(X, Y, learning_rate, num_iteration):
    N = len(X)
    M = len(X[0])
    m = [0 for i in range(M)]
    for i in range(num_iteration):
        m = step_gradient(X, Y, learning_rate, m)
    return m


def fit(X, Y, learning_rate = 0.1, num_iteration = 500):
    N = len(X)
    ## For c(constant) ... add extra column
    c = np.array([[1] for i in range(N)])
    X = np.append(X, c, axis = 1)
    m = gd(X, Y, learning_rate, num_iteration)
    return m

In [4]:
m = fit(x_train, y_train)
m

[-0.9379538345151149,
 0.7406793748132231,
 0.01065760495348224,
 0.7809880843640792,
 -2.1744127597545213,
 2.354416219640054,
 0.12322204969801252,
 -2.9523959893120573,
 2.530781411305952,
 -1.700337899401696,
 -2.251435750096298,
 0.5883104319007403,
 -4.263622585311314,
 22.67724159382909]

In [5]:
## Cost on training data
cost(x_train, y_train, m)

(379, 14)


23.46605159003627

In [6]:
y_train_pred = predict(x_train, m)
y_train_pred

[34.15737512897514,
 24.522468192130745,
 13.529802248871889,
 18.961500339580116,
 22.376844417904298,
 30.651240660616235,
 22.443426460938007,
 20.90030971050885,
 20.358126691903816,
 36.21173929422801,
 25.25165944426227,
 23.38707487665291,
 42.84094849259475,
 26.443417393196448,
 22.587981012206125,
 19.05811411775712,
 23.079148578294294,
 19.943711980938794,
 19.48601314637302,
 33.53440949167196,
 20.940426032614525,
 30.98979752219077,
 35.13836288868843,
 25.967744136536293,
 21.24344551851975,
 19.57587195495758,
 13.3751304377799,
 33.06709367929027,
 28.413859075299285,
 29.389299614743116,
 27.221152110823613,
 19.712139788505688,
 32.686104237477544,
 16.935196185077512,
 35.026892032569805,
 6.804929887455895,
 9.550241076788335,
 14.365896543232056,
 34.48224177149975,
 23.798853153907388,
 17.517600252480285,
 30.021405091993728,
 25.710863073175382,
 12.745119637622436,
 14.215728774892707,
 19.632992932649657,
 26.847186832145653,
 24.64256303939482,
 7.924760687

In [7]:
score(y_train_pred, y_train)

0.7331366531434433

In [8]:
## Cost on testing data
y_test_predict = predict(x_test, m)
y_test_predict
df = pd.DataFrame(y_test_predict)
df.to_csv("Prediction.csv", index = False, header = None)

## SKLearn Gradient Descent

In [9]:
alg = GradientBoostingRegressor()
alg.fit(x_train, y_train)

GradientBoostingRegressor(alpha=0.9, ccp_alpha=0.0, criterion='friedman_mse',
                          init=None, learning_rate=0.1, loss='ls', max_depth=3,
                          max_features=None, max_leaf_nodes=None,
                          min_impurity_decrease=0.0, min_impurity_split=None,
                          min_samples_leaf=1, min_samples_split=2,
                          min_weight_fraction_leaf=0.0, n_estimators=100,
                          n_iter_no_change=None, presort='deprecated',
                          random_state=None, subsample=1.0, tol=0.0001,
                          validation_fraction=0.1, verbose=0, warm_start=False)

In [10]:
y_train_pred = alg.predict(x_train)
y_train_pred


array([37.81722612, 23.07317245, 14.49407075, 19.36583192, 20.81290068,
       32.04386176, 20.67888341, 20.8704292 , 20.61552045, 34.86048439,
       22.86208501, 20.90044999, 50.17554378, 23.3813911 , 20.59268047,
       15.30707728, 21.41856219, 20.63996027, 18.25812701, 34.78873994,
       21.71524892, 34.62596686, 31.95755423, 27.91217926, 16.72909184,
       18.71282658, 15.21212286, 32.96505971, 28.44938977, 23.93782692,
       22.67801587, 20.04480056, 34.8288639 , 19.06958753, 34.05574361,
        8.46595084, 10.13261183, 14.62204504, 40.79648264, 22.97689195,
       18.02134219, 24.37668351, 22.77478931, 22.60299104, 15.20221802,
       15.78232312, 23.93924831, 24.68098336, 14.02617814, 12.70295484,
       10.2276883 , 18.90389414, 17.69464907, 44.83180306, 31.52690671,
       10.97906229, 26.01463002, 11.35591463, 25.58086711, 29.09171573,
       14.42640575, 17.96169818, 38.63483999, 11.08634911, 17.90607022,
       26.22044159, 20.65119245, 19.71712902, 15.22000501, 49.64

In [11]:
y_test_pred = alg.predict(x_test)
y_test_pred

array([11.71762886, 26.80499145, 17.83589307, 22.06512279, 21.19058003,
       12.2832724 , 28.87118865, 25.20237061, 18.77275134, 22.99272373,
       23.39058111, 17.6291958 , 19.64676654, 20.8280366 , 51.86380438,
       20.4503074 , 23.49131107, 22.87310447, 19.17816831, 30.68495565,
       20.89789526, 21.49588187, 36.62299579, 31.39775722, 33.4155803 ,
       16.38574194, 20.47642029, 30.64770333, 19.97381035, 28.7847961 ,
       17.218498  , 24.68879147, 20.79190642, 22.83448526,  8.58056865,
       23.80896034, 22.57697789, 20.44057093, 22.71189925,  8.0482711 ,
       14.84655379, 23.59244202, 24.97135723, 19.90148152, 21.80376912,
        9.53863396, 43.705159  , 24.57774682, 34.31075662, 12.83411894,
       14.06393226, 45.23318763, 14.38762866, 20.95608777, 17.02834623,
       19.74580756, 18.88744499, 22.4778254 , 15.32494152, 15.84899874,
       13.68437009, 27.27249687, 22.29881719, 23.0038954 , 14.49435547,
       16.20543174, 32.88520177, 12.57183546, 20.91664926, 21.38