Import important libraries

In [19]:
import numpy as np
import pandas as pd
import random

Read dataset using pandas

In [20]:
df=pd.read_csv('gradient_descent_large_dataset.csv')
df.head()

Unnamed: 0,feature_1,feature_2,feature_3,feature_4,feature_5,target
0,37.454012,134.723658,8.077904,43.913722,0.282588,-73.45891
1,95.071431,99.451704,2.697165,296.991407,0.458677,159.226764
2,73.199394,69.546561,-14.896305,401.445683,0.099215,93.542437
3,59.865848,123.664179,-0.678734,384.591816,0.446837,-61.136211
4,15.601864,91.867814,-13.490336,420.324142,0.203081,-142.432637


Train-Test-split the dataset

In [21]:
def train_test(x,y,test_size=0.2,random_state=1):
    data=list(zip(x,y))
    random.seed(random_state)
    random.shuffle(data)
    x_s,y_s=zip(*data)
    
    split=int(len(x_s)*(1-test_size))
    x_tr=x_s[:split]
    x_te=x_s[split:]
    y_tr=y_s[:split]
    y_te=y_s[split:]
    return np.array(x_tr),np.array(x_te),np.array(y_tr),np.array(y_te)

x=df.drop(columns=['target']).values
y=df['target'].values
x_train,x_test,y_train,y_test=train_test(x,y,test_size=0.2,random_state=2)

Standardization of the data

In [22]:
mean = np.mean(x_train, axis=0)
std = np.std(x_train, axis=0)
x_train = (x_train - mean) / std
x_test = (x_test - mean) / std

Implementing Gradient descent from scratch

In [23]:
class GradientDescent:

    def __init__(self,learning_rate=0.1,epochs=1001):
        self.weights=None
        self.learning_rate=learning_rate
        self.epochs=epochs
        self.bias=None
        self.losses = []


  
    def fit(self,x,y):
        n_sample,feature=x.shape
        
        self.weights=np.zeros(feature)
        self.bias=0
        
        for i in range(self.epochs):
            
            h=x@self.weights+self.bias
            loss = np.mean((h - y) ** 2)
            self.losses.append(loss)
            
            dw=(1/n_sample)*(x.T@(h-y))
            db=(1/n_sample)*(np.sum(h-y))

            self.weights-=self.learning_rate*dw
            self.bias-=self.learning_rate*db
            
    
        
    def predict(self,x):
        return  x@self.weights+self.bias


In [24]:
lr=GradientDescent(learning_rate=0.01,epochs=500)
lr.fit(x_train,y_train)
y_predict=lr.predict(x_test)

Mean squared error

In [25]:
def mse(y_true, y_pred):
    return np.mean((y_true - y_pred) ** 2)

print("MSE:", mse(y_test, y_predict))

MSE: 398.1635418515139
