# Training from-scratch linear regression model

Here, I will train the linear regression model with batch gradient descent built from scratch. Metrics to evaluate the model will be presented afterwards.

In [None]:
# Import sys package and add relative path so src can be reached
import sys
sys.path.append("..")

# Import additional packages
import pandas as pd
import matplotlib.pyplot as plt
import json
import os
from sklearn.metrics import mean_squared_error, r2_score

# Import the model class from the from-scratch model
from src.linear_reg_from_scratch import LinearRegFromScratch

# Adjust display settings
pd.set_option("display.max_columns", None)
%matplotlib inline

# Train on: preprocessed data, dataset 1

In [None]:
# Load preprocessed data

train = pd.read_csv("../data/processed/train.csv")
test = pd.read_csv("../data/processed/test.csv")

X_train = train.drop("target", axis= 1).values
y_train = train["target"].values
X_test = test.drop("target", axis= 1).values
y_test = test["target"].values

In [None]:
# Initialize model
model_own = LinearRegFromScratch(
    learning_rate= 0.01,
    n_iterations= 1500,
    tol_loss= 1e-7,
    print_every= 100
)

# Fit model
model_own.fit(X_train, y_train)

# Print summary
print(model_own)

Iteration 100, cost = 1170.307707341779
Iteration 200, cost = 158.29639970948148
Iteration 300, cost = 21.427441469995546
Iteration 400, cost = 2.905679739374242
Iteration 500, cost = 0.39763426507370286
Iteration 600, cost = 0.057786909539438394
Iteration 700, cost = 0.011703035317520911
Iteration 800, cost = 0.005449096753904045
Iteration 900, cost = 0.004599676693406218
Iteration 1000, cost = 0.004484203204343002
Converged at iteration 1066 with cost = 0.004470892124740759
LinearRegFromScratch(learning rate = 0.01, n iterations = 1500, loss tolerance = 1e-07)
weights = [94.5011 49.0403  5.4685 75.1761  4.3804]
bias = 3.1864


In [None]:
# Predict
y_pred_own = model_own.predict(X_test)

In [None]:
# Evaluate with metrics

mse_own = mean_squared_error(y_test, y_pred_own)

r2_own = r2_score(y_test, y_pred_own)

print(f"Own model: MSE = {mse_own:.4f}, R-Squared = {r2_own:.4f}")

Own model: MSE = 0.0099, R-Squared = 1.0000


In [None]:
# Save results

os.makedirs("../data/results", exist_ok= True)
with open("../data/results/metrics_own.json", "w") as f:
    json.dump(
        {"Model": "Own with batch GD", "MSE": mse_own, "R-Squared": r2_own},
        f,
        indent= 2
    )