In [45]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [46]:
class BostonHousingDataset:
    def __init__(self):
        self.url = "http://lib.stat.cmu.edu/datasets/boston"
        self.feature_names = ["CRIM", "ZN", "INDUS", "CHAS", "NOX", "RM", "AGE", "DIS", "RAD", "TAX", "PTRATIO", "B", "LSTAT"]

    def load_dataset(self):
        # Fetch data from URL
        raw_df = pd.read_csv(self.url, sep="\s+", skiprows=22, header=None)
        data = np.hstack([raw_df.values[::2, :], raw_df.values[1::2, :2]])
        target = raw_df.values[1::2, 2]

        # Create the dictionary in sklearn format
        dataset = {
            'data': [],
            'target': [],
            'feature_names': self.feature_names,
            'DESCR': 'Boston House Prices dataset'
        }

        dataset['data'] = data
        dataset['target'] = target

        return dataset

  raw_df = pd.read_csv(self.url, sep="\s+", skiprows=22, header=None)


In [47]:
boston_housing = BostonHousingDataset()
boston_dataset = boston_housing.load_dataset()
boston_dataset.keys(), boston_dataset['DESCR']

(dict_keys(['data', 'target', 'feature_names', 'DESCR']),
 'Boston House Prices dataset')

In [48]:
boston = pd.DataFrame(boston_dataset['data'], columns=boston_dataset['feature_names'])
boston['TARGET'] = boston_dataset['target']
boston.head()

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,TARGET
0,0.00632,18.0,2.31,0.0,0.538,6.575,65.2,4.09,1.0,296.0,15.3,396.9,4.98,24.0
1,0.02731,0.0,7.07,0.0,0.469,6.421,78.9,4.9671,2.0,242.0,17.8,396.9,9.14,21.6
2,0.02729,0.0,7.07,0.0,0.469,7.185,61.1,4.9671,2.0,242.0,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0.0,0.458,6.998,45.8,6.0622,3.0,222.0,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0.0,0.458,7.147,54.2,6.0622,3.0,222.0,18.7,396.9,5.33,36.2


In [49]:
X = boston.drop("TARGET" , axis=1)
y = boston["TARGET"]

In [50]:
from sklearn.preprocessing import StandardScaler
import random

In [78]:

class LinearRegression:

    def __init__(self , learning_rate , iterations) -> None:

        self.learning_rate = learning_rate
        self.iterations = iterations
        self.W = None
        self.b = None
        self.scaler = StandardScaler()
  
    def fit(self , X , y , initialze_type):

        #X_scaled = self.scaler.fit_transform(X)
        X_scaled = self.scale(X)
        #X_scaled = X
        #m -> number of data entry in the dataset 
        #n -> Features in the dataset -> 13
        m , n = X_scaled.shape

        self.W , self.b = self.initialize_weight(n , initialze_type)

        for iterator in range(1 , self.iterations + 1):

            y_predicted = np.dot(X_scaled , self.W) + self.b

            dw = (2/m) * np.dot(X_scaled.T , (y_predicted - y))
            db = (2/m) * np.sum(y_predicted - y)


            self.W -= self.learning_rate * dw
            self.b -= self.learning_rate * db

            if iterator % 100 == 0:
                mean_sq_loss = self.mean_sq_error(y , y_predicted)
                mean_absolute_loss = self.mean_abs_error(y , y_predicted)
                print(f"Epoch {iterator} : Mean Square Loss : {mean_sq_loss}")
                print(f"Epoch {iterator} : Mean Absolute Loss : {mean_absolute_loss}")
                print(f"Epoch {iterator} : Root Mean Square Loss : {self.root_mean_square_error(y , y_predicted)}")
    def initialize_weight(self , n , type):
        if type == "zero":
            return np.zeros(n) , 0
        if type == "random":
            return np.random.rand(n) , 0
    
    def predict(self , X):
        X = self.scaler.fit_transform(X)
        return np.dot(X  , self.W) + self.b

    def mean_sq_error(self , actual_y , y_pred):
        return np.mean((actual_y - y_pred)**2)
    
    def mean_abs_error(self, acutal_y , y_pred):
        return np.mean(acutal_y - y_pred)
    
    def root_mean_square_error(self, acutal_y , y_pred):
        return np.sqrt((self.mean_sq_error(acutal_y , y_pred)))
    
    def scale(self, X):
        return (X - np.mean(X , axis=0))/np.std(X , axis=0)

In [79]:
from sklearn.model_selection import train_test_split

X_train , X_test , y_train , y_test = train_test_split(X , y , test_size=0.2 , random_state=42)

In [80]:
X_train.shape

(404, 13)

In [81]:
y_train.shape

(404,)

In [82]:
model = LinearRegression(learning_rate=0.01 , iterations=1000)


In [84]:
model = LinearRegression(learning_rate=0.01 , iterations=1000)
model.fit(X_train , y_train , initialze_type="zero")

Epoch 100 : Mean Square Loss : 32.99474749462366
Epoch 100 : Mean Absolute Loss : 3.0849656139611508
Epoch 100 : Root Mean Square Loss : 5.744105456433026
Epoch 200 : Mean Square Loss : 22.644244803295788
Epoch 200 : Mean Absolute Loss : 0.4091267696741078
Epoch 200 : Root Mean Square Loss : 4.758596936418947
Epoch 300 : Mean Square Loss : 22.146148622960265
Epoch 300 : Mean Absolute Loss : 0.05425821049883106
Epoch 300 : Root Mean Square Loss : 4.705969466853802
Epoch 400 : Mean Square Loss : 21.965330944369036
Epoch 400 : Mean Absolute Loss : 0.007195699780000066
Epoch 400 : Root Mean Square Loss : 4.686718568931682
Epoch 500 : Mean Square Loss : 21.860790945662487
Epoch 500 : Mean Absolute Loss : 0.000954290509171595
Epoch 500 : Root Mean Square Loss : 4.675552474912723
Epoch 600 : Mean Square Loss : 21.79572544460893
Epoch 600 : Mean Absolute Loss : 0.00012655758352523712
Epoch 600 : Root Mean Square Loss : 4.668589234941208
Epoch 700 : Mean Square Loss : 21.753147260216608
Epoch 7

In [34]:
y_pred = model.predict(X_test)
print("Mean Absolute Error for Test Data : " , model.mean_abs_error(y_test , y_pred))
print("Mean Square Error for Test Data : " , model.mean_sq_error(y_test , y_pred))
print("Root Mean square Error for Test Data : " , model.root_mean_square_error(y_test , y_pred))

Mean Absolute Error for Test Data :  -1.3082993209818778
Mean Square Error for Test Data :  27.726567814211837
Root Mean square Error for Test Data :  5.265602322072171
