# Linear Regression  - Gradient Descent

Data - https://drive.google.com/file/d/1ItJT3faKTNi4eZNo3GGnUORDO2_7C6JZ/view?usp=sharing

## 1. Linear Regression Scratch - Gradient Descent

In [1]:
import numpy as np

class LinearRegressionScratchGD:
    
    def __init__(self, iterations = 2000, learning_rate=0.01, option='BatchGD'):
        self.coef_ = None
        self.intercept_ = None
        self.iterations = iterations
        self.learning_rate = learning_rate
        # option could be 'BatchGD', 'MiniBatchGD', 'SGD'
        self.option = option
    
    def fit(self,X,y):
        X = np.array(X)
        y = np.array(y)
        weights = np.array([np.random.randn(1)[0] for i in range(len(X[0])+1)])
        # Doing 2000 iterations for convergence
        X = np.array([[1]+list(i) for i in X])
        if(self.option == 'MiniBatchGD'):
            indices = [np.random.randint(len(X)) for i in range(int(0.1*len(X)))]
            X_new = X[indices]
            y_new = y[indices]
        elif(self.option == 'SGD'):
            indices = [np.random.randint(len(X))]
            X_new = X[indices]
            y_new = y[indices]
        else:
            X_new = X
            y_new = y
        y_err = y_new - (weights @ X_new.T)
        for i in range(self.iterations):
            weights += (2*self.learning_rate*(y_err @ X_new))/len(X_new)
            y_err = y_new - (weights @ X_new.T)
            if(self.option == 'MiniBatchGD'):
                indices = [np.random.randint(len(X)) for i in range(int(0.1*len(X)))]
                X_new = X[indices]
                y_new = y[indices]
            elif(self.option == 'SGD'):
                indices = [np.random.randint(len(X))]
                X_new = X[indices]
                y_new = y[indices]
            else:
                X_new = X
                y_new = y
        self.intercept_ = np.array([weights[0]])
        self.coef_ = weights[1:]
        self.weights = weights
    
    def predict(self,X):
        X = np.array(X)
        newX = []
        for i in X:
            newX.append([1]+list(i))
        X = np.array(newX)
        return X @ self.weights.T

In [2]:
import pandas as pd
link = 'https://drive.google.com/file/d/1ItJT3faKTNi4eZNo3GGnUORDO2_7C6JZ/view?usp=sharing'
df = pd.read_csv(f'https://drive.google.com/uc?id={link.split("/")[-2]}')
df.head()

Unnamed: 0,cgpa,package
0,6.89,3.26
1,5.12,1.98
2,7.82,3.25
3,7.42,3.67
4,6.94,3.57


In [3]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(df.drop('package',axis=1),df['package'],test_size=0.2,random_state=42)

In [4]:
lr_scratch_gd = LinearRegressionScratchGD(iterations=20000,learning_rate=0.01,option='BatchGD')

In [5]:
lr_scratch_gd.fit(X_train, y_train)

In [6]:
lr_scratch_gd.coef_

array([0.57423909])

In [7]:
lr_scratch_gd.intercept_

array([-1.02688228])

In [8]:
lr_scratch_gd_preds = lr_scratch_gd.predict(X_test)

In [9]:
from sklearn.metrics import r2_score
r2_score(lr_scratch_gd_preds,y_test)

0.7337850020750591

## 2. Linear Regression Sklearn Library - Gradient Descent

In [10]:
from sklearn.linear_model import SGDRegressor

In [11]:
sgd_regressor = SGDRegressor(learning_rate='constant', eta0=0.01)

In [12]:
sgd_regressor.fit(X_train, y_train)

SGDRegressor(learning_rate='constant')

In [13]:
sgd_regressor.coef_

array([0.43100204])

In [14]:
sgd_regressor.intercept_

array([-0.18296192])

In [15]:
sgd_regressor_preds = sgd_regressor.predict(X_test)

In [16]:
from sklearn.metrics import r2_score
r2_score(sgd_regressor_preds,y_test)

0.2582458515724062