In [43]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler

class LinearRegression:
    def __init__(self, alpha = 0.05, epoch = 10000):
        self.alpha = alpha
        self.epoch = epoch
        self.theta = None
        self.bias = None
    
    def fit(self, X, Y):
        x = np.array(X) #all the multiple variables
        y = np.array(Y).flatten() #actual price
        num_iterations, num_features = x.shape
        
        self.theta = np.zeros(num_features)
        self.bias = 0.0
        
        for _ in range(self.epoch):
            y_pred = x @ self.theta + self.bias
            error = y_pred - y
            
            theta_change = (1 / num_iterations) * (x.T @ error)
            bias_change = (1/num_iterations) * np.sum(error)
            
            self.theta -= self.alpha * theta_change
            self.bias -= self.alpha * bias_change
            
    def final(self, X):
        x = np.array(X)
        return x @ self.theta + self.bias




In [52]:

df = pd.read_csv('housing.csv')

for col in df.select_dtypes(include='number').columns:
    df[col] = df[col].fillna(df[col].median())

X = df.drop("median_house_value", axis=1)
y = df["median_house_value"].values.flatten().astype(float)
X = pd.get_dummies(X, drop_first=True)
X = X.astype(float)
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
y_scaled = y / 100000


md = LinearRegression(alpha = 0.10, epoch = 1000000)
md.fit(X_scaled, y_scaled)


pred_scaled = md.final(X_scaled)
pred = pred_scaled * 100000 
print(pred[:20])

def r2_score(y_true, y_pred):
    ss_res = ((y_true - y_pred) ** 2).sum()
    ss_tot = ((y_true - y_true.mean()) ** 2).sum()
    return 1 - ss_res / ss_tot

print("R² score:", r2_score(y, pred))


[408492.35829822 423996.66388553 378466.63041436 321115.43273007
 255844.57608296 262638.06036665 258517.0154351  256928.73320209
 201376.10134427 268124.12875957 237020.93244157 258660.90817032
 231291.42830864 210994.44994971 203868.08620304 185094.28432479
 214673.81553514 192450.17493269 187682.26162404 206654.29763358]
R² score: 0.6454530166046623
