In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

class LinearRegression(object):
    def __init__(self,seed=None):
        np.random.seed(seed if seed is not None else np.random.randint(100))
        self.W = None
        self.b = None
        self.length = None
        self.m = None
        self.costs = []
        self.iterations = []
        
    def __initialize_weights_and_bais(self):
        """
        Randomly generating weights 
        """
        self.W = np.random.randn(self.length) #(n,1)
        self.b = 0
        
    def __computeCost(self,h,Y):
        """Mean Squared Error"""
        loss = np.square(h-Y)
        cost = np.sum(loss)/(2*self.m)
        return cost

    def __optimize(self,X,Y,lr=None):
        """
        Optimizing step for weight and bias
        """
        h = self.predict(X)
        dW = np.dot( X.T, (h-Y) ) / self.m
        db = np.sum( h-Y )  / self.m
        self.W = self.W - lr*dW
        self.b = self.b - lr*db
    
    def fit(self, X, y, lr=1e-5, tol=0.01, verbose=True):
        """
        Initialize weights and optimize them using multiple iterations
        """
        self.m,self.length = X.shape
        self.__initialize_weights_and_bais()
        last_cost,i = float('inf'),0
        while True:
            h = self.predict(X)
            cost = self.__computeCost(h,y)
            if verbose: print(f"Iteration: {i}, Cost: {cost:.3f}")
            self.__optimize(X,y,lr=lr)
            if last_cost-cost < tol: break
            else: last_cost,i = cost,i+1
            self.costs.append(cost)
            self.iterations.append(i)
            
    def predict(self,X):
        """
        Prediction for X
        """
        return np.dot(X,self.W)+self.b
    
    def plot(self,figsize=(7,5)):
        """
        Interation vs Cost
        """
        plt.figure(figsize=figsize)
        plt.plot(self.iterations,self.costs)
        plt.xlabel('Iterations')
        plt.ylabel('Cost')
        plt.title("Iterations vs Cost")
        plt.show()
        
    def score(self,X,y):
        """r2 : 1-ESS/TSS"""
        return 1-(np.sum(((self.predict(X) - y)**2))/np.sum((y-np.mean(y))**2))
        
    @property
    def weights(self): return self.W
    
    @property
    def bias(self): return self.b

$ output(h) =  X.W + b $

$ cost(J) = \frac{1}{2*m} \sum_{i=1}^m (h-Y)^2  $

$ \large \frac{dJ}{dw} = \frac{1}{m} \small (X^{T} . (h-Y)) $

$ \large \frac{dJ}{db} = \frac{1}{m} \small \sum (h-Y) $

In [None]:
def normalize(X): return (X-X.mean(axis=0))/X.std(axis=0)

def train_test_split(X,Y,test_size=None,seed=5):
    """
    Custom Train Test split function
    """
    assert test_size!=None, "test_size cannot be None"
    np.random.seed(seed)
    indexes = np.random.choice([False,True],size=len(X),p=[test_size,1-test_size])
    return X[indexes],X[~indexes],Y[indexes],Y[~indexes]

In [None]:
def root_mean_squared_error(Y,y):
    """
    Args
    Y : predicted
    y : True labels
    """
    loss = np.square(Y-y)
    cost = np.sqrt(np.mean(loss))
    return cost

In [None]:
data = pd.read_csv('../Decision Tree Regressor/data.csv')
data.head()

In [None]:
X = data.drop('medv',axis=1).values
Y = data['medv'].values
X.shape,Y.shape

In [None]:
X = normalize(X)  #normalizing data

X_train,X_val,Y_train,Y_val = train_test_split(X,Y,test_size=0.3,seed=7)
X_train.shape,X_val.shape,Y_train.shape,Y_val.shape

In [None]:
lr = LinearRegression(seed=5)
lr.fit(X_train,Y_train,lr=0.1,verbose=False)
lr.score(X_val,Y_val)

In [None]:
root_mean_squared_error( lr.predict(X_val), Y_val )

In [None]:
lr.plot()

In [None]:
lr.weights

In [None]:
lr.bias