<a href="https://colab.research.google.com/github/dropthejase/ml_training/blob/main/ml_from_scratch/linear_regression.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [25]:
class LinearRegression():

  def __init__(self, iter=1000, lr=0.001, reg=None, reg_lambda=0.01):
    self.iter = iter
    self.lr = lr

    self.reg = reg
    self.reg_lambda = reg_lambda

    self.weight = None
    self.bias = None
  
  def fit(self, X_train, y_train):

    N, D = X_train.shape
    
    # initialise w and b
    
    self.weight = np.ones(D)
    self.bias = 0

    # regularisation

    if self.reg == 'l1':
      reg_dw = self.reg_lambda * (self.weight / np.abs(self.weight))
    elif self.reg == 'l2':
      reg_dw = self.reg_lambda * self.weight
    else:
      reg_dw = 0

    # gradient descent

    for _ in range(self.iter):

      y_pred = self.predict(X_train)

      dw = (1/N) * np.dot(X_train.T, (y_pred - y_train)) + reg_dw
      db = (1/N) * np.sum(y_pred - y_train)

      self.weight -= self.lr * dw
      self.bias -= self.lr * db

  def predict(self, X):
    y_pred = np.dot(X, self.weight) + self.bias
    return y_pred

  def score(self, X_test, y_test, print=False):

    N, D = X_test.shape

    y_pred = self.predict(X_test)

    mse = np.sum((y_test - y_pred)**2) / N
    rmse = mse ** 0.5

    r2 = 1 - (np.sum((y_test - y_pred)**2) / np.sum((y_test - np.mean(y_pred))**2))

    return f'MSE: {np.round(mse,2)}, RMSE: {np.round(rmse,2)}, R2: {np.round(r2,2)}'
    
    if print == True:
      print(f'MSE: {np.round(mse,2)}')
      print(f'RMSE: {np.round(rmse,2)}')
      print(f'R2 score: {np.round(r2,2)}')




**LOAD SAMPLE DATA**

In [26]:
from sklearn import datasets

iris = datasets.load_iris()

In [27]:
df = pd.DataFrame(iris.data, columns=['sepal_length','sepal_width','petal_length','petal_width'])
df['target'] = iris.target

In [28]:
df = df.sample(frac = 1)
df.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,target
39,5.1,3.4,1.5,0.2,0
8,4.4,2.9,1.4,0.2,0
138,6.0,3.0,4.8,1.8,2
30,4.8,3.1,1.6,0.2,0
72,6.3,2.5,4.9,1.5,1


**TEST THE MODEL**

In [36]:
from sklearn.model_selection import train_test_split

In [37]:
X = df.drop('target',axis=1)
y = df['target']

In [38]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

In [39]:
reg = LinearRegression()

In [40]:
reg.fit(X_train, y_train)

In [41]:
{'weights': np.round(reg.weight,2), 'biases': np.round(reg.bias,2)}

{'weights': array([-0.26,  0.29,  0.25,  0.76]), 'biases': -0.22}

In [42]:
np.round(reg.predict(X_test),0)

array([ 2.,  3.,  2.,  0.,  2.,  1.,  0.,  1.,  0.,  1.,  0.,  0., -0.,
        1.,  2.,  0.,  2.,  0.,  0.,  2.,  2.,  0., -0., -0.,  2.,  1.,
       -0.,  2.,  2.,  0.,  2.,  2.,  2.,  2., -0.,  1.,  2.,  1.,  1.,
        0.,  0.,  1.,  2.,  0.,  0.,  1.,  2.,  1.,  2.,  2.])

In [43]:
reg.score(X_test, y_test)

'MSE: 0.05, RMSE: 0.23, R2: 0.93'

**Compare with Scikit Learn**

In [44]:
from sklearn import linear_model

In [45]:
reg_scikit = linear_model.LinearRegression()

In [46]:
reg_scikit.fit(X_train, y_train)

In [47]:
{'weights': np.round(reg_scikit.coef_,2), 'biases': np.round(reg_scikit.intercept_,2)}

{'weights': array([-0.11, -0.06,  0.23,  0.6 ]), 'biases': 0.22}

In [48]:
np.round(reg_scikit.predict(X_test),0)

array([ 2.,  2.,  2., -0.,  2.,  1.,  0.,  1., -0.,  1., -0., -0., -0.,
        1.,  2.,  0.,  2., -0.,  0.,  2.,  2.,  0., -0., -0.,  2.,  1.,
       -0.,  2.,  2., -0.,  2.,  2.,  2.,  2., -0.,  1.,  2.,  1.,  1.,
       -0., -0.,  1.,  1., -0.,  0.,  1.,  2.,  1.,  2.,  2.])

In [49]:
reg_scikit.score(X_test, y_test)

0.9517646199458922