#### **Imports**

In [1]:
import numpy as np

from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

from linear import LinearRegressionSML

## **Data**

We will be using the https://scikit-learn.org/stable/modules/generated/sklearn.datasets.fetch_california_housing.html dataset to test the model.

In [2]:
# Load the California Housing dataset
data = fetch_california_housing()

In [3]:
# Split the dataset into features (X) and target variable (y)
X = data.data
y = data.target

In [4]:
scaler = StandardScaler()

# Fit the scaler to the features
scaler.fit(X)

# Perform feature scaling on the features
X_scaled = scaler.transform(X)

In [5]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Print the shapes of the resulting sets
print("X_train shape:", X_train.shape)
print("y_train shape:", y_train.shape)
print("X_test shape:", X_test.shape)
print("y_test shape:", y_test.shape)

X_train shape: (16512, 8)
y_train shape: (16512,)
X_test shape: (4128, 8)
y_test shape: (4128,)


## **Linear Regression Implementation**

Here an implementation of Linear Regression(from scratch) using only numpy is provided. The LinearRegression class allows to fit the model using the OLS method or Gradient Descent.

In [21]:
# Insantiate models
lr_sklearn = LinearRegression()
lr_ols = LinearRegressionSML()
lr_sgd = LinearRegressionSML()

# Fit models
lr_sklearn.fit(X_train,y_train)
lr_ols.fit(X_train,y_train,method='ols')
lr_sgd.fit(X_train,y_train,method="gradientdesc",learning_rate=0.00001, num_iterations = 1000,verbose=True)

# Evaluate models
y_pred = lr_sklearn.predict(X_test)  # Predict target values for the test data
mse_sklearn = mean_squared_error(y_test, y_pred)
mse_ols = lr_ols.evaluate(X_test,y_test)
mse_sgd = lr_sgd.evaluate(X_test,y_test)

print(f"MSE for OLS(Sklearn): {mse_sklearn}")
print(f"MSE for OLS: {mse_ols}")
print(f"MSE for GD: {mse_sgd}")

Iteration 10: Mean Squared Error = 1.06
Iteration 20: Mean Squared Error = 0.84
Iteration 30: Mean Squared Error = 0.77
Iteration 40: Mean Squared Error = 0.72
Iteration 50: Mean Squared Error = 0.68
Iteration 60: Mean Squared Error = 0.65
Iteration 70: Mean Squared Error = 0.62
Iteration 80: Mean Squared Error = 0.60
Iteration 90: Mean Squared Error = 0.59
Iteration 100: Mean Squared Error = 0.57
Iteration 110: Mean Squared Error = 0.56
Iteration 120: Mean Squared Error = 0.55
Iteration 130: Mean Squared Error = 0.55
Iteration 140: Mean Squared Error = 0.54
Iteration 150: Mean Squared Error = 0.54
Iteration 160: Mean Squared Error = 0.53
Iteration 170: Mean Squared Error = 0.53
Iteration 180: Mean Squared Error = 0.53
Iteration 190: Mean Squared Error = 0.53
Iteration 200: Mean Squared Error = 0.53
Iteration 210: Mean Squared Error = 0.52
Iteration 220: Mean Squared Error = 0.52
Iteration 230: Mean Squared Error = 0.52
Iteration 240: Mean Squared Error = 0.52
Iteration 250: Mean Squar