In [None]:
# ANDREW JOYNER
# 801293231
# HOMEWORK 2

# **PROBLEM 1.A**

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

In [None]:
url = "https://raw.githubusercontent.com/HamedTabkhi/Intro-to-ML/main/Dataset/Housing.csv"
df = pd.read_csv(url)
display(df.head())

In [None]:
print("\nColumn names:")
print(df.columns.tolist())

In [None]:
# prepare date for linear regression

features = ['area', 'bedrooms', 'bathrooms', 'stories', 'parking']
target = 'price'

X = df[features].values
y = df[target].values

print("first five feature rows")
display(X[:5])
print("\nfirst fice target values:")
display(y[:5])

In [None]:
# 80% train, 20% test no scaling for problem 1
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) #arbitrary random state 42 (got it off the internet)

print("Raw (unscaled) features; first five rows")
print(X_train[:5])

# split for validation set
X_train_final, X_val, y_train_final, y_val = train_test_split(
    X_train, y_train, test_size=0.2, random_state=42
)

print(f"\nfinal training set: {X_train_final.shape}")
print(f"validation set: {X_val.shape}")
print(f"test set: {X_test.shape}")

In [None]:
class LinearRegressionGD:

    def __init__(self, learning_rate=0.01, max_iterations=2000):
        self.learning_rate = learning_rate  # step size
        self.max_iterations = max_iterations
        self.costs_train = []  # training cost
        self.costs_val = []    # validation cost

    def add_bias(self, X):
        """adds bias terms to features"""
        return np.c_[np.ones(X.shape[0]), X]

    def compute_cost(self, X, y, theta):
        """how much cost..."""
        m = X.shape[0]
        predictions = X.dot(theta)  # predict using params
        cost = (1/(2*m)) * np.sum((predictions - y)**2)  # calculate error
        return cost

    def compute_gradients(self, X, y, theta):
        """calculate gradient for eacxh param"""
        m = X.shape[0]
        predictions = X.dot(theta)
        gradients = (1/m) * X.T.dot(predictions - y)
        return gradients

    def fit(self, X_train, y_train, X_val=None, y_val=None):
        """train model"""

        X_train_bias = self.add_bias(X_train) #add bias
        if X_val is not None:
            X_val_bias = self.add_bias(X_val)

        # init params to zero
        n_features = X_train_bias.shape[1]
        self.theta = np.zeros(n_features)
        print(f"Initialized {n_features} parameters to zero")

        # training loop
        for i in range(self.max_iterations):
            # 1.) calculate training cost
            train_cost = self.compute_cost(X_train_bias, y_train, self.theta)
            self.costs_train.append(train_cost)

            # calculate validation cost
            if X_val is not None:
                val_cost = self.compute_cost(X_val_bias, y_val, self.theta)
                self.costs_val.append(val_cost)

            # find gradients to impro e model
            gradients = self.compute_gradients(X_train_bias, y_train, self.theta)

            # iterate in direction of gradient
            self.theta = self.theta - self.learning_rate * gradients

            # print progress every 200th iteration so we can keep track of model
            if i % 200 == 0:
                val_info = f", Validation Cost: {val_cost:.2f}" if X_val is not None else ""
                print(f"Iteration {i:4d}: Training Cost: {train_cost:.2f}{val_info}")

        print("training done")

    def predict(self, X):
        """predict with new data"""
        X_bias = self.add_bias(X)
        return X_bias.dot(self.theta)

    def score(self, X, y):
        """calculate r squared score"""
        predictions = self.predict(X)
        ss_res = np.sum((y - predictions) ** 2)
        ss_tot = np.sum((y - np.mean(y)) ** 2)
        r2 = 1 - (ss_res / ss_tot)
        return r2

In [None]:
# testing different learning rates
learning_rates = [1e-10, 1e-11, 1e-12]  # using the normal 0.1, 0.05, 0.01 returned values too large so I had to make them smaller................. my head hurts so bad
models = {}
results = {}

print("Training with small learning rates for unscaled features")
print("(Balances numerical stability with learning effectiveness)")
print("=" * 60)

for lr in learning_rates:
    print(f"\nlearning rate: {lr}")
    print("-" * 40)

    # create and train model
    model = LinearRegressionGD(learning_rate=lr, max_iterations=2000)
    model.fit(X_train_final, y_train_final, X_val, y_val)


    models[lr] = model

    # evaluate
    test_predictions = model.predict(X_test)
    train_r2 = model.score(X_train_final, y_train_final)
    val_r2 = model.score(X_val, y_val)
    test_r2 = model.score(X_test, y_test)

    results[lr] = {
        'train_r2': train_r2,
        'val_r2': val_r2,
        'test_r2': test_r2,
        'final_train_cost': model.costs_train[-1],
        'final_val_cost': model.costs_val[-1]
    }

    print(f"\nresults:")
    print(f"   training R²: {train_r2:.4f}")
    print(f"   validation R²: {val_r2:.4f}")
    print(f"   test R²: {test_r2:.4f}")
    print(f"   final training cost: {model.costs_train[-1]:.2f}")
    print(f"   final validation cost: {model.costs_val[-1]:.2f}")



In [None]:
# plot training and validation loss for each learning rate
plt.figure(figsize=(15, 5))

# plots for each learning rate
for i, lr in enumerate(learning_rates):
    plt.subplot(1, 3, i+1)
    model = models[lr]

    # plot
    plt.plot(model.costs_train, label='Training Loss', color='blue', linewidth=2)
    plt.plot(model.costs_val, label='Validation Loss', color='red', linewidth=2)

    plt.title(f'Learning Rate: {lr}')
    plt.xlabel('Iterations')
    plt.ylabel('Cost (MSE)')
    plt.legend()
    plt.grid(True, alpha=0.3)

plt.tight_layout()
plt.suptitle('Training and Validation Loss Curves for Different Learning Rates',
             fontsize=16, y=1.05)
plt.show()

# determine best learning rate
best_lr = min(results.keys(), key=lambda x: results[x]['final_val_cost'])
print(f"\nbest learning rate: {best_lr}")
print(f"best model performance:")
for metric, value in results[best_lr].items():
    print(f"   {metric}: {value:.4f}")

In [None]:
#plot best learning curve
best_model = models[best_lr]

plt.figure(figsize=(12, 5))

# graph with training and validation loss
plt.subplot(1, 2, 1)
plt.plot(best_model.costs_train, label='Training Loss', color='blue', linewidth=2)
plt.plot(best_model.costs_val, label='Validation Loss', color='red', linewidth=2)
plt.title(f'Best Model: Learning Rate = {best_lr}')
plt.xlabel('Iterations')
plt.ylabel('Cost (MSE)')
plt.legend()
plt.grid(True, alpha=0.3)

# actual vs model predicted prices
test_predictions = best_model.predict(X_test)
plt.subplot(1, 2, 2)
plt.scatter(y_test, test_predictions, alpha=0.6, color='purple')
plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'r--', linewidth=2)
plt.xlabel('Actual Prices')
plt.ylabel('Predicted Prices')
plt.title(f'Actual vs Predicted (R² = {results[best_lr]["test_r2"]:.4f})')
plt.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

# final model parameters
print(f"final model parameters (θ) for learning rate {best_lr}:")
print(f"   bias (θ₀): {best_model.theta[0]:.4f}")
for i, feature in enumerate(features):
    print(f"   {feature} (θ{i+1}): {best_model.theta[i+1]:.4f}")

# Calculate additional metrics
from sklearn.metrics import mean_squared_error, mean_absolute_error

mse = mean_squared_error(y_test, test_predictions)
mae = mean_absolute_error(y_test, test_predictions)
rmse = np.sqrt(mse)

print(f"\n test set performance metrics:")
print(f"   mean squared error (MSE): {mse:.2f}")
print(f"   root mean squared rrror (RMSE): {rmse:.2f}")
print(f"   mean absolute error (MAE): {mae:.2f}")
print(f"   r-squared (R²): {results[best_lr]['test_r2']:.4f}")


# **Problem 1.B**

In [None]:
# prepare data for problem 1.b
from sklearn.preprocessing import LabelEncoder

# define all 11 features for 1.b
features_1b = ['area', 'bedrooms', 'bathrooms', 'stories', 'mainroad', 'guestroom',
               'basement', 'hotwaterheating', 'airconditioning', 'parking', 'prefarea']


# create dataframe copy for preprocessing
df_1b = df.copy()

# convert categorical variables to binary
le = LabelEncoder()
categorical_cols = ['mainroad', 'guestroom', 'basement', 'hotwaterheating', 'airconditioning', 'prefarea']

for col in categorical_cols:
    if col in df_1b.columns:
        df_1b[col] = le.fit_transform(df_1b[col])


# rxtract features and target for 1.b
X_1b = df_1b[features_1b].values
y_1b = df_1b['price'].values


print("First five rows of processed features:")
display(pd.DataFrame(X_1b[:5]))

In [None]:
# split
X_train_1b, X_test_1b, y_train_1b, y_test_1b = train_test_split(X_1b, y_1b, test_size=0.2, random_state=42)

# validation
X_train_final_1b, X_val_1b, y_train_final_1b, y_val_1b = train_test_split(
    X_train_1b, y_train_1b, test_size=0.2, random_state=42
)

print(f"Problem 1.b - Training set: {X_train_final_1b.shape}")
print(f"Problem 1.b - Validation set: {X_val_1b.shape}")
print(f"Problem 1.b - Test set: {X_test_1b.shape}")

# ditto
learning_rates_1b = [1e-10, 1e-11, 1e-12]
models_1b = {}
results_1b = {}

print("\n" + "="*60)
print("PROBLEM 1.b: Training with ALL 11 features (NO SCALING)")
print("Using small learning rates to balance stability and learning")
print("="*60)

for lr in learning_rates_1b:
    print(f"\nLearning rate: {lr}")
    print("-" * 40)

    # create and train model
    model = LinearRegressionGD(learning_rate=lr, max_iterations=2000)
    model.fit(X_train_final_1b, y_train_final_1b, X_val_1b, y_val_1b)

    models_1b[lr] = model

    test_predictions = model.predict(X_test_1b)
    train_r2 = model.score(X_train_final_1b, y_train_final_1b)
    val_r2 = model.score(X_val_1b, y_val_1b)
    test_r2 = model.score(X_test_1b, y_test_1b)

    results_1b[lr] = {
        'train_r2': train_r2,
        'val_r2': val_r2,
        'test_r2': test_r2,
        'final_train_cost': model.costs_train[-1],
        'final_val_cost': model.costs_val[-1]
    }

    print(f"results:")
    print(f"   training R²: {train_r2:.4f}")
    print(f"   validation R²: {val_r2:.4f}")
    print(f"   test R²: {test_r2:.4f}")
    print(f"   final training cost: {model.costs_train[-1]:.2f}")
    print(f"   final validation cost: {model.costs_val[-1]:.2f}")


# find best learning rate for problem 1.b
best_lr_1b = min(results_1b.keys(), key=lambda x: results_1b[x]['final_val_cost'])
print(f"best learning rate for 1.b: {best_lr_1b}")

In [None]:
# ditto comments from 1.a
plt.figure(figsize=(15, 5))

for i, lr in enumerate(learning_rates_1b):
    plt.subplot(1, 3, i+1)
    model = models_1b[lr]

    plt.plot(model.costs_train, label='Training Loss', color='blue', linewidth=2)
    plt.plot(model.costs_val, label='Validation Loss', color='red', linewidth=2)

    plt.title(f'Learning Rate: {lr}')
    plt.xlabel('Iterations')
    plt.ylabel('Cost (MSE)')
    plt.legend()
    plt.grid(True, alpha=0.3)

plt.tight_layout()
plt.suptitle('Problem 1.b: Training and Validation Loss Curves (11 Features, No Scaling)',
             fontsize=16, y=1.05)
plt.show()

# **PROBLEM 2.A**

In [None]:
# 80% train, 20% test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) #arbitrary random state 42 (got it off the internet)

# standardize with sklearn.preprocessing StandardScaler
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

print("scaled features; first five rows")
print(X_train_scaled[:5])

# Split training data further to have validation set for plotting
X_train_final, X_val, y_train_final, y_val = train_test_split(
    X_train_scaled, y_train, test_size=0.2, random_state=42
)

print(f"\nfinal training set: {X_train_final.shape}")
print(f"validation set: {X_val.shape}")
print(f"test set: {X_test_scaled.shape}")

In [None]:
# test new learning rates
learning_rates = [0.1, 0.05, 0.01]
models = {}
results = {}

print("we will train models with different learning rates")
print("=" * 60)

for lr in learning_rates:
    print(f"\nlearning rate: {lr}")
    print("-" * 40)

    # ditto
    model = LinearRegressionGD(learning_rate=lr, max_iterations=1000)
    model.fit(X_train_final, y_train_final, X_val, y_val)

    models[lr] = model

    test_predictions = model.predict(X_test_scaled)
    train_r2 = model.score(X_train_final, y_train_final)
    val_r2 = model.score(X_val, y_val)
    test_r2 = model.score(X_test_scaled, y_test)

    results[lr] = {
        'train_r2': train_r2,
        'val_r2': val_r2,
        'test_r2': test_r2,
        'final_train_cost': model.costs_train[-1],
        'final_val_cost': model.costs_val[-1]
    }

    print(f"\nresults:")
    print(f"   training R²: {train_r2:.4f}")
    print(f"   validation R²: {val_r2:.4f}")
    print(f"   test R²: {test_r2:.4f}")
    print(f"   final training cost: {model.costs_train[-1]:.2f}")
    print(f"   final validation cost: {model.costs_val[-1]:.2f}")



In [None]:
# plot training and validation loss for each learning rate
plt.figure(figsize=(15, 5))

# plots for each learning rate
for i, lr in enumerate(learning_rates):
    plt.subplot(1, 3, i+1)
    model = models[lr]

    # plot
    plt.plot(model.costs_train, label='Training Loss', color='blue', linewidth=2)
    plt.plot(model.costs_val, label='Validation Loss', color='red', linewidth=2)

    plt.title(f'Learning Rate: {lr}')
    plt.xlabel('Iterations')
    plt.ylabel('Cost (MSE)')
    plt.legend()
    plt.grid(True, alpha=0.3)

plt.tight_layout()
plt.suptitle('Training and Validation Loss Curves for Different Learning Rates',
             fontsize=16, y=1.05)
plt.show()

# determine best learning rate
best_lr = min(results.keys(), key=lambda x: results[x]['final_val_cost'])
print(f"\nbest learning rate: {best_lr}")
print(f"best model performance:")
for metric, value in results[best_lr].items():
    print(f"   {metric}: {value:.4f}")

In [None]:
# ditto
best_model = models[best_lr]

plt.figure(figsize=(12, 5))

plt.subplot(1, 2, 1)
plt.plot(best_model.costs_train, label='Training Loss', color='blue', linewidth=2)
plt.plot(best_model.costs_val, label='Validation Loss', color='red', linewidth=2)
plt.title(f'Best Model: Learning Rate = {best_lr}')
plt.xlabel('Iterations')
plt.ylabel('Cost (MSE)')
plt.legend()
plt.grid(True, alpha=0.3)

test_predictions = best_model.predict(X_test_scaled)
plt.subplot(1, 2, 2)
plt.scatter(y_test, test_predictions, alpha=0.6, color='purple')
plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'r--', linewidth=2)
plt.xlabel('Actual Prices')
plt.ylabel('Predicted Prices')
plt.title(f'Actual vs Predicted (R² = {results[best_lr]["test_r2"]:.4f})')
plt.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

print(f"final model parameters (θ) for learning rate {best_lr}:")
print(f"   bias (θ₀): {best_model.theta[0]:.4f}")
for i, feature in enumerate(features):
    print(f"   {feature} (θ{i+1}): {best_model.theta[i+1]:.4f}")

from sklearn.metrics import mean_squared_error, mean_absolute_error

mse = mean_squared_error(y_test, test_predictions)
mae = mean_absolute_error(y_test, test_predictions)
rmse = np.sqrt(mse)

print(f"\n test set performance metrics:")
print(f"   mean squared error (MSE): {mse:.2f}")
print(f"   root mean squared rrror (RMSE): {rmse:.2f}")
print(f"   mean absolute error (MAE): {mae:.2f}")
print(f"   r-squared (R²): {results[best_lr]['test_r2']:.4f}")


# **Problem 2.B**

In [None]:
from sklearn.preprocessing import LabelEncoder

features_2b = ['area', 'bedrooms', 'bathrooms', 'stories', 'mainroad', 'guestroom',
               'basement', 'hotwaterheating', 'airconditioning', 'parking', 'prefarea']

df_2b = df.copy()

le = LabelEncoder()
categorical_cols = ['mainroad', 'guestroom', 'basement', 'hotwaterheating', 'airconditioning', 'prefarea']

for col in categorical_cols:
    if col in df_2b.columns:
        df_2b[col] = le.fit_transform(df_2b[col])

X_2b = df_2b[features_2b].values
y_2b = df_2b['price'].values

print("first five rows of processed features (pre-scaling):")
display(pd.DataFrame(X_2b[:5], columns=features_2b))

In [None]:
# split data and apply scaling
X_train_2b, X_test_2b, y_train_2b, y_test_2b = train_test_split(X_2b, y_2b, test_size=0.2, random_state=42)

# standardize features with sklearn StandardScaler
scaler_2b = StandardScaler()
X_train_scaled_2b = scaler_2b.fit_transform(X_train_2b)
X_test_scaled_2b = scaler_2b.transform(X_test_2b)

print("Scaled features (first five rows):")
print(X_train_scaled_2b[:5])

# split for validation set
X_train_final_2b, X_val_2b, y_train_final_2b, y_val_2b = train_test_split(
    X_train_scaled_2b, y_train_2b, test_size=0.2, random_state=42
)

print(f"\nProblem 2.b - Training set: {X_train_final_2b.shape}")
print(f"Problem 2.b - Validation set: {X_val_2b.shape}")
print(f"Problem 2.b - Test set: {X_test_scaled_2b.shape}")

In [None]:
# train models with different learning rates for scaled 11 features
learning_rates_2b = [0.1, 0.05, 0.01]
models_2b = {}
results_2b = {}

print("\n" + "="*60)
print("PROBLEM 2.b: Training with ALL 11 features WITH SCALING")
print("Using standard learning rates for scaled features")
print("="*60)

for lr in learning_rates_2b:
    print(f"\nLearning rate: {lr}")
    print("-" * 40)

    # create and train model
    model = LinearRegressionGD(learning_rate=lr, max_iterations=1000)
    model.fit(X_train_final_2b, y_train_final_2b, X_val_2b, y_val_2b)

    models_2b[lr] = model

    # evaluate performance
    test_predictions = model.predict(X_test_scaled_2b)
    train_r2 = model.score(X_train_final_2b, y_train_final_2b)
    val_r2 = model.score(X_val_2b, y_val_2b)
    test_r2 = model.score(X_test_scaled_2b, y_test_2b)

    results_2b[lr] = {
        'train_r2': train_r2,
        'val_r2': val_r2,
        'test_r2': test_r2,
        'final_train_cost': model.costs_train[-1],
        'final_val_cost': model.costs_val[-1]
    }

    print(f"Results:")
    print(f"   Training R²: {train_r2:.4f}")
    print(f"   Validation R²: {val_r2:.4f}")
    print(f"   Test R²: {test_r2:.4f}")
    print(f"   Final training cost: {model.costs_train[-1]:.2f}")
    print(f"   Final validation cost: {model.costs_val[-1]:.2f}")

# find best learning rate for problem 2.b
best_lr_2b = min(results_2b.keys(), key=lambda x: results_2b[x]['final_val_cost'])
print(f"\nBest learning rate for 2.b: {best_lr_2b}")
print(f"Best model performance:")
for metric, value in results_2b[best_lr_2b].items():
    print(f"   {metric}: {value:.4f}")

In [None]:
# plot training and validation loss curves for problem 2.b
plt.figure(figsize=(15, 5))

for i, lr in enumerate(learning_rates_2b):
    plt.subplot(1, 3, i+1)
    model = models_2b[lr]

    plt.plot(model.costs_train, label='Training Loss', color='blue', linewidth=2)
    plt.plot(model.costs_val, label='Validation Loss', color='red', linewidth=2)

    plt.title(f'Learning Rate: {lr}')
    plt.xlabel('Iterations')
    plt.ylabel('Cost (MSE)')
    plt.legend()
    plt.grid(True, alpha=0.3)

plt.tight_layout()
plt.suptitle('Problem 2.b: Training and Validation Loss Curves (11 Features, WITH Scaling)',
             fontsize=16, y=1.05)
plt.show()

In [None]:
# detailed analysis of best model for problem 2.b
best_model_2b = models_2b[best_lr_2b]

plt.figure(figsize=(12, 5))

# plot learning curves
plt.subplot(1, 2, 1)
plt.plot(best_model_2b.costs_train, label='Training Loss', color='blue', linewidth=2)
plt.plot(best_model_2b.costs_val, label='Validation Loss', color='red', linewidth=2)
plt.title(f'Best Model: Learning Rate = {best_lr_2b}')
plt.xlabel('Iterations')
plt.ylabel('Cost (MSE)')
plt.legend()
plt.grid(True, alpha=0.3)

# actual vs predicted scatter plot
test_predictions_2b = best_model_2b.predict(X_test_scaled_2b)
plt.subplot(1, 2, 2)
plt.scatter(y_test_2b, test_predictions_2b, alpha=0.6, color='purple')
plt.plot([y_test_2b.min(), y_test_2b.max()], [y_test_2b.min(), y_test_2b.max()], 'r--', linewidth=2)
plt.xlabel('Actual Prices')
plt.ylabel('Predicted Prices')
plt.title(f'Actual vs Predicted (R² = {results_2b[best_lr_2b]["test_r2"]:.4f})')
plt.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

# display final model parameters
print(f"Final model parameters (θ) for learning rate {best_lr_2b}:")
print(f"   Bias (θ₀): {best_model_2b.theta[0]:.4f}")
for i, feature in enumerate(features_2b):
    print(f"   {feature} (θ{i+1}): {best_model_2b.theta[i+1]:.4f}")

# calculate additional performance metrics
from sklearn.metrics import mean_squared_error, mean_absolute_error

mse_2b = mean_squared_error(y_test_2b, test_predictions_2b)
mae_2b = mean_absolute_error(y_test_2b, test_predictions_2b)
rmse_2b = np.sqrt(mse_2b)

print(f"\nTest set performance metrics:")
print(f"   Mean Squared Error (MSE): {mse_2b:.2f}")
print(f"   Root Mean Squared Error (RMSE): {rmse_2b:.2f}")
print(f"   Mean Absolute Error (MAE): {mae_2b:.2f}")
print(f"   R-squared (R²): {results_2b[best_lr_2b]['test_r2']:.4f}")

# **Problem 3.A**

In [None]:
# new linear regression with linearization
class LinearRegressionGDRidge(LinearRegressionGD):
    def __init__(self, learning_rate=0.01, max_iterations=2000, lambda_=0.1):
        super().__init__(learning_rate, max_iterations)
        self.lambda_ = lambda_

    def compute_cost(self, X, y, theta, regularize=True):
        m = X.shape[0]
        predictions = X.dot(theta)
        cost = (1/(2*m)) * np.sum((predictions - y)**2)
        if regularize:
            cost += (self.lambda_/(2*m)) * np.sum(theta[1:]**2)
        return cost

    def compute_gradients(self, X, y, theta, regularize=True):
        m = X.shape[0]
        predictions = X.dot(theta)
        gradients = (1/m) * X.T.dot(predictions - y)
        if regularize:
            reg = np.concatenate([[0], self.lambda_ * theta[1:]/m])
            gradients += reg
        return gradients

    def fit(self, X_train, y_train, X_val=None, y_val=None):
        X_train_bias = self.add_bias(X_train)
        if X_val is not None:
            X_val_bias = self.add_bias(X_val)
        n_features = X_train_bias.shape[1]
        self.theta = np.zeros(n_features)
        self.costs_train = []
        self.costs_val = []
        for i in range(self.max_iterations):
            train_cost = self.compute_cost(X_train_bias, y_train, self.theta, regularize=True)
            self.costs_train.append(train_cost)
            if X_val is not None:
                val_cost = self.compute_cost(X_val_bias, y_val, self.theta, regularize=False)
                self.costs_val.append(val_cost)
            gradients = self.compute_gradients(X_train_bias, y_train, self.theta, regularize=True)
            self.theta = self.theta - self.learning_rate * gradients
            if i % 200 == 0:
                val_info = f", Validation Cost: {val_cost:.2f}" if X_val is not None else ""
                print(f"Iteration {i:4d}: Training Cost: {train_cost:.2f}{val_info}")
        print("training done (ridge)")


In [None]:
ridge_learning_rate = best_lr  # from 2.a
ridge_lambda = 0.1

ridge_model = LinearRegressionGDRidge(learning_rate=ridge_learning_rate, max_iterations=1000, lambda_=ridge_lambda)
ridge_model.fit(X_train_final, y_train_final, X_val, y_val)

# eval
ridge_test_predictions = ridge_model.predict(X_test_scaled)
ridge_train_r2 = ridge_model.score(X_train_final, y_train_final)
ridge_val_r2 = ridge_model.score(X_val, y_val)
ridge_test_r2 = ridge_model.score(X_test_scaled, y_test)

print(f"\nRidge Regression Results (3.a):")
print(f"   Training R²: {ridge_train_r2:.4f}")
print(f"   Validation R²: {ridge_val_r2:.4f}")
print(f"   Test R²: {ridge_test_r2:.4f}")
print(f"   Final training cost: {ridge_model.costs_train[-1]:.2f}")
print(f"   Final validation cost: {ridge_model.costs_val[-1]:.2f}")

# plot training and validation loss
plt.figure(figsize=(10, 4))
plt.plot(ridge_model.costs_train, label='Training Loss', color='blue')
plt.plot(ridge_model.costs_val, label='Validation Loss', color='red')
plt.title('Ridge Regression (3.a): Training and Validation Loss')
plt.xlabel('Iterations')
plt.ylabel('Cost (MSE)')
plt.legend()
plt.grid(True, alpha=0.3)
plt.show()

# actual vs predicted
plt.figure(figsize=(5, 5))
plt.scatter(y_test, ridge_test_predictions, alpha=0.6, color='purple')
plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'r--', linewidth=2)
plt.xlabel('Actual Prices')
plt.ylabel('Predicted Prices')
plt.title(f'Actual vs Predicted (R² = {ridge_test_r2:.4f})')
plt.grid(True, alpha=0.3)
plt.show()


# **Problem 3.B**

In [None]:
ridge_learning_rate_2b = best_lr_2b  # from 2.b
ridge_lambda = 0.1

ridge_model_2b = LinearRegressionGDRidge(learning_rate=ridge_learning_rate_2b, max_iterations=1000, lambda_=ridge_lambda)
ridge_model_2b.fit(X_train_final_2b, y_train_final_2b, X_val_2b, y_val_2b)

# evaluate
ridge_test_predictions_2b = ridge_model_2b.predict(X_test_scaled_2b)
ridge_train_r2_2b = ridge_model_2b.score(X_train_final_2b, y_train_final_2b)
ridge_val_r2_2b = ridge_model_2b.score(X_val_2b, y_val_2b)
ridge_test_r2_2b = ridge_model_2b.score(X_test_scaled_2b, y_test_2b)

print(f"\nRidge Regression Results (3.b):")
print(f"   Training R²: {ridge_train_r2_2b:.4f}")
print(f"   Validation R²: {ridge_val_r2_2b:.4f}")
print(f"   Test R²: {ridge_test_r2_2b:.4f}")
print(f"   Final training cost: {ridge_model_2b.costs_train[-1]:.2f}")
print(f"   Final validation cost: {ridge_model_2b.costs_val[-1]:.2f}")

# plot training and validation loss
plt.figure(figsize=(10, 4))
plt.plot(ridge_model_2b.costs_train, label='Training Loss', color='blue')
plt.plot(ridge_model_2b.costs_val, label='Validation Loss', color='red')
plt.title('Ridge Regression (3.b): Training and Validation Loss')
plt.xlabel('Iterations')
plt.ylabel('Cost (MSE)')
plt.legend()
plt.grid(True, alpha=0.3)
plt.show()

# avctual vs predicted
plt.figure(figsize=(5, 5))
plt.scatter(y_test_2b, ridge_test_predictions_2b, alpha=0.6, color='purple')
plt.plot([y_test_2b.min(), y_test_2b.max()], [y_test_2b.min(), y_test_2b.max()], 'r--', linewidth=2)
plt.xlabel('Actual Prices')
plt.ylabel('Predicted Prices')
plt.title(f'Actual vs Predicted (R² = {ridge_test_r2_2b:.4f})')
plt.grid(True, alpha=0.3)
plt.show()
