In [None]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from sklearn.datasets import load_diabetes

## Load in the dataset into a DataFrame

In [None]:
df = load_diabetes(as_frame=True).frame

In [None]:
df.head()

## Plot the BMI vs. Target

In [None]:
def create_plot(x, y, x_line=None, y_line=None):
    # Create the scatter plot
    fig = px.scatter(x=x, y=y, title="Body Mass Index (BMI) vs. Diabetes Progression Target")

    fig.update_layout(
        xaxis_title="Body Mass Index (BMI)",
        yaxis_title="Diabetes Progression Target"
    )

    # Add the best fit line if the values are provided
    if x_line is None or y_line is None:
        return fig
    
    fig.add_trace(
        go.Scatter(
            x=x_line,
            y=y_line,
            mode='lines',
            name='Best Fit Line',
            line=dict(color='red', width=2)
        )
    )
    return fig

In [None]:
x = df["bmi"]
y = df["target"]
fig = create_plot(x, y)
fig.show()

## To find the best fit line, we should start with a random slope and intercept

In [None]:
m = 1
b = 0

In [None]:
x_line = np.linspace(min(x), max(x), 10)
y_line = m * x_line + b
fig = create_plot(x, y, x_line, y_line)
fig.show()

## Calculate the errors for each prediction

In [None]:
def calculate_mse(_m, _b):
    errors = _m * x + _b - y
    return np.mean(errors * errors)

## Compute the gradients and update the slop and intercept until the loss converges

In [None]:
alpha = 0.05
epsilon = 1e-8
max_iterations = 50000
N = len(x)

In [None]:
def gradient_step(_m, _b):
    new_errors = _m * x + _b - y
    grad_m = (2 / N) * np.dot(new_errors, x)
    grad_b = (2 / N) * sum(new_errors)

    new_m = _m - alpha * grad_m
    new_b = _b - alpha * grad_b
    return new_m, new_b

In [None]:
losses = []
loss = calculate_mse(m, b)

for i in range(max_iterations):
    losses.append(loss)
    m, b = gradient_step(m, b)
    new_loss = calculate_mse(m, b)

    if np.isnan(new_loss) or new_loss > 1e12:
        raise ValueError("Diverged - try a smaller alpha.")

    if abs(loss - new_loss) < epsilon:
        loss = new_loss
        break

    loss = new_loss

## Plot the loss

In [None]:
losses_fig = px.line(losses[20:], title="MSE Losses")
losses_fig.update_layout(
    xaxis_title="Index",
    yaxis_title="Loss"
)
losses_fig.show()

In [None]:
m_vals = np.linspace(m - 5, m + 5, 100)
b_vals = np.linspace(b - 50, b + 50, 100)

M, B = np.meshgrid(m_vals, b_vals)


In [None]:
Z = np.zeros_like(M)

for i in range(M.shape[0]):
    for j in range(M.shape[1]):
        y_hat = M[i, j] * x + B[i, j]
        Z[i, j] = np.mean((y_hat - y)**2)

In [None]:
surface_fig = go.Figure(
    data=[
        go.Surface(
            x=M,
            y=B,
            z=Z,
            colorscale="Viridis"
        )
    ]
)

surface_fig.update_layout(
    title="MSE Loss Surface",
    scene=dict(
        xaxis_title="Slope (m)",
        yaxis_title="Intercept (b)",
        zaxis_title="MSE"
    )
)

surface_fig.show()

## Report the final results

In [None]:
y_pred = m * x + b
mse = np.mean((y_pred - y)**2)
rmse = np.sqrt(mse)
r2 = 1 - np.sum((y - y_pred)**2) / np.sum((y - np.mean(y))**2)

print(f"Final Results:\n-> Best Fit Line:\ty = {m}x + {b}\n-> MSE:\t\t\t{mse}\n-> RMSE:\t\t{rmse}\n-> R2:\t\t\t{r2}")
x_line = np.linspace(min(x), max(x), 10)
y_line = m * x_line + b
fig = create_plot(x, y, x_line, y_line)
fig.show()