### BATCH GRADIENT DESCENT for Multiple Variables

In [1]:
import pandas as pd
import numpy as np
from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression

In [2]:
X,y = load_diabetes(return_X_y=True) 

In [3]:
print(X.shape) 
print(y.shape) 

(442, 10)
(442,)


In [4]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=2) 

In [5]:
print(X_train.shape) 
print(X_test.shape) 
print(y_train.shape) 
print(y_test.shape) 

(353, 10)
(89, 10)
(353,)
(89,)


In [6]:
lr = LinearRegression() 

In [7]:
lr.fit(X_train,y_train) 

LinearRegression()

In [8]:
lr.coef_

array([  -9.16088483, -205.46225988,  516.68462383,  340.62734108,
       -895.54360867,  561.21453306,  153.88478595,  126.73431596,
        861.12139955,   52.41982836])

In [9]:
lr.intercept_

151.88334520854633

###Now let's build own model for Batch gradient diescent 

In [10]:
class meraBGD:
    
    def __init__(self,epoch=100,learning_rate=0.01):
        self.epoch = epoch
        self.learning_rate = learning_rate
        self.coef_ = None
        self.intercept_ = 0
    
    def fit(self,X_train,y_train):
        self.coef_ = np.ones(X_train.shape[1])
        beta1 = 0
        for i in range(self.epoch):
            y_hat = self.intercept_ + np.dot(X_train,self.coef_) 
            slope = -2* np.mean(y_train - y_hat)
            step_size = self.learning_rate*slope
            self.intercept_ = self.intercept_ - step_size
            
            y_hat_m = self.intercept_ + np.dot(X_train,self.coef_) 
            slope_m = -2* np.dot((y_train-y_hat_m),X_train)/ X_train.shape[0]
            step_size_m = self.learning_rate*slope_m
            self.coef_ = self.coef_ - step_size_m
        #return self.intercept_, self.coef_
    
    def predict(self,X_test):
        return np.dot(X_test,self.coef_) + self.intercept_

In [11]:
BGD = meraBGD()  

In [12]:
BGD.fit(X_train,y_train) 

In [13]:
y_pred = BGD.predict(X_test)  

In [14]:
BGD.coef_

array([ 2.63796551,  1.19432568,  5.06576488,  4.31465526,  2.44962513,
        1.99760286, -1.49436247,  3.60915559,  5.24266399,  3.51641742])

In [15]:
BGD.intercept_

130.52901376992088

In [16]:
# WE have sucessfully made our model .. 
#values are not same of our model and sklearn model beacuse of the epochs and learning rate
#just need to tune epochs and learning rate little more and we will get our values same.