#### **Imports**

In [None]:
import numpy as np

from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression

from scratchml import LinearRegressionSML

: 

## **Data**

We will be using the https://scikit-learn.org/stable/modules/generated/sklearn.datasets.fetch_california_housing.html dataset to test the model.

In [30]:
# Load the California Housing dataset
data = fetch_california_housing()

In [31]:
# Split the dataset into features (X) and target variable (y)
X = data.data
y = data.target

In [32]:
scaler = StandardScaler()

# Fit the scaler to the features
scaler.fit(X)

# Perform feature scaling on the features
X_scaled = scaler.transform(X)

In [33]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Print the shapes of the resulting sets
print("X_train shape:", X_train.shape)
print("y_train shape:", y_train.shape)
print("X_test shape:", X_test.shape)
print("y_test shape:", y_test.shape)

X_train shape: (16512, 8)
y_train shape: (16512,)
X_test shape: (4128, 8)
y_test shape: (4128,)


## **Linear Regression Implementation**

Here an implementation of Linear Regression(from scratch) using only numpy is provided. The LinearRegression class allows to fit the model using the OLS method or Gradient Descent.

In [35]:
# Insantiate models
lr_sklearn = LinearRegression()
lr_ols = LinearRegressionSML()
lr_sgd = LinearRegressionSML()

# Fit models
lr_sklearn.fit(X_train,y_train)
lr_ols.fit(X_train,y_train,method='ols')
lr_sgd.fit(X_train,y_train,method="gradientdesc",learning_rate=0.01, num_iterations = 5000,lambda_val=0.01,verbose=False)

# Evaluate models
mse_sklearn = lr_sklearn.score(X_test,y_test)
mse_ols = lr_ols.evaluate(X_test,y_test)
mse_sgd = lr_sgd.evaluate(X_test,y_test)

print(f"MSE for OLS(Sklearn): {mse_sklearn}")
print(f"MSE for OLS: {mse_ols}")
print(f"MSE for SGD: {mse_sgd}")

MSE for OLS(Sklearn): 0.5757877060324511
MSE for OLS: 0.5558915986952444
MSE for SGD: 0.5600119427714327
