In [63]:
import numpy as np 
import matplotlib.pyplot as plt 

class ScratchLinearRegression:
    def __init__(self, learning_rate, epochs, tol):
        """
        Hyperparameters and parameters definition
        """
        # Hyperparameters
        self.learning_rate = learning_rate
        self.epochs = epochs
        self.tol = tol

        # Parameters
        self.weights = None
        self.bias = None

    def _hypothesis_function(self, weights, bias, X):
        """
        Computes an hypothesis about the final model
        """    
        return np.dot(X, weights ) + bias  # Assuming X is (m, n) and weights is (n,)

    def _cost_function(self, weights, bias, X, y):
        """
        Computes the cost of hypothesizing
        """
        m = X.shape[0]
        LOSS = np.zeros(m)
        f_wb = self._hypothesis_function(weights, bias, X)
        error = np.dot(f_wb-y,f_wb-y)
        LOSS += error
        return sum(LOSS) / (2 * m)
    
    def _initialize_parameter(self, n_features):
        self.weights = np.zeros(n_features)
        self.bias    = 0

    def _compute_gradient(self,weights,bias,X,y):
        """
        Computes the gradient of the cost function 
        Parameters : weights -> weights.shape = (n_features,); dtype: ndarray
                     bias    ->  number; dtype : float 
                     X       -> X.shape   = (n_instances,n_features); dtype: ndarray
                     y       -> y.shape   = (n_instances); dtype:ndarray

        return dj_dw  -> (n_features,); dtype: ndarray
               dj_db  -> number ; dtype : float               
        """
        n_features = X.shape[1]
        m          = X.shape[0]
        dj_dw = np.zeros(n_features)
        dj_db = 0
        k = 0
        for i in range(m):
            inter = 0
            for j in range(n_features):
                inter += weights[j] * X[i][j]
            inter += bias - y[i]
            inter = X[i][k]*inter
            dj_dw[k] += inter
            k+=1
            



    


In [13]:
import sklearn
from sklearn.datasets import _california_housing, fetch_california_housing

raw_data = fetch_california_housing()

In [18]:
import pandas as pd 

df_features = pd.DataFrame(data=raw_data.data,columns=raw_data.feature_names)

df_target = pd.DataFrame(data=raw_data.target,columns=['price'])

df_combined = pd.concat([df_features,df_target],axis=1)

df_combined.head()

Unnamed: 0,MedInc,HouseAge,AveRooms,AveBedrms,Population,AveOccup,Latitude,Longitude,price
0,8.3252,41.0,6.984127,1.02381,322.0,2.555556,37.88,-122.23,4.526
1,8.3014,21.0,6.238137,0.97188,2401.0,2.109842,37.86,-122.22,3.585
2,7.2574,52.0,8.288136,1.073446,496.0,2.80226,37.85,-122.24,3.521
3,5.6431,52.0,5.817352,1.073059,558.0,2.547945,37.85,-122.25,3.413
4,3.8462,52.0,6.281853,1.081081,565.0,2.181467,37.85,-122.25,3.422


In [42]:
import matplotlib.pyplot as plt 

y = df_combined['price'][:30]
X = df_combined[['HouseAge','AveBedrms']][:30]

y.shape, X.shape

((30,), (30, 2))

In [65]:
X.shape

(30, 2)

In [64]:
model = ScratchLinearRegression(0.001,1000,0.00000001)
#model._hypothesis_function(weights=np.zeros(2),bias=30,X=X)
model._cost_function(np.zeros(2),0,X=X,y=y)


np.float64(77.17879549999999)