In [228]:
import numpy as np
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

In [454]:

class Model:
    
    def __init__(self):
        self.theta= None
        np.random.seed(100)

    def fit(self, X: np.ndarray, y: np.ndarray,iterations=50,learning_rate = 0.01) -> None:
        #SGD
        m = X.shape[0]
        n = X.shape[1]+1
        self.theta = np.random.randn(n,1)
        X_b = np.c_[np.ones((m,1)),X]
        for i in range(iterations):
            for i in range(m):
                random_index = np.random.randint(m)
                xi = X_b[random_index:random_index+1]
                yi = y[random_index:random_index+1]
                gradient = xi.T.dot(yi - xi.dot(self.theta)) + (1/m)*self.theta
                self.theta = self.theta + learning_rate*gradient
                #gradient = (yi - xi.dot(theta)).dot(xi) + theta.T/m
                #theta = theta + learning_rate*gradient.T
        return

    def fit_batch(self, X: np.ndarray, y: np.ndarray,iterations=10000,batch_size=16,learning_rate = 0.01) -> None:
        #Mini batch gradient
        m = X.shape[0]
        n = X.shape[1]+1
        self.theta = np.random.randn(n,1)
        X_b = np.c_[np.ones((m,1)),X]
        for i in range(iterations):
            indexes = np.random.randint(low=0,high=m,size=batch_size)
            xi = X_b[indexes]
            yi = y[indexes].reshape(batch_size,1)
            gradient =(1/batch_size)* xi.T.dot(yi - xi.dot(self.theta))
            self.theta = self.theta + learning_rate*gradient
        return


    def coef(self):
        return self.theta


    def predict(self, X: np.ndarray) -> np.ndarray:
        m = X.shape[0]
        X_b = np.c_[np.ones((m,1)),X]
        y_predict = X_b.dot(self.theta)
        return y_predict.reshape(m,)

    @staticmethod
    def evaluate(y_true: np.ndarray, y_pred: np.ndarray) -> float:
        return np.sqrt(np.mean((y_true-y_pred)**2))


In [441]:
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
X,y,coef= make_regression(n_samples=1000,n_features=1,n_informative=1,coef=True,bias=10)
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size = 0.2)

In [455]:
lin = Model()
lin.fit(X_train,y_train)
pred = lin.predict(X_test)
lin.evaluate(y_test,pred)

0.11819598817451804

In [450]:
lin1 = Model()
lin1.fit_batch(X_train,y_train)
pred = lin1.predict(X_test)
lin1.evaluate(y_test,pred)

2.8428973524033527e-13

In [444]:
lin_reg = LinearRegression()
lin_reg.fit(X_train,y_train)
pred = lin_reg.predict(X_test)
mean_squared_error(y_test,pred)

9.834018930575654e-28