<a href="https://colab.research.google.com/github/gtsagkatakis/OptimizationMethods_2024/blob/main/RidgeRegression_example.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

# Generate synthetic data
np.random.seed(0)
X = 2 * np.random.rand(100, 50)  # 100 samples, 10 feature
y = 4 + 3 * X + np.random.randn(100, 50)  # y = 4 + 3X + noise

# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

# Define functions for typical linear regression and ridge regression
def typical_linear_regression(X, y):
    # Closed-form solution for linear regression: (X^T X)^(-1) X^T y
    w = np.linalg.inv(X.T @ X) @ X.T @ y
    return w


# Typical Linear Regression
w_typical = typical_linear_regression(X_train, y_train)
y_train_pred_typical = X_train @ w_typical
y_test_pred_typical = X_test @ w_typical
train_error_typical = mean_squared_error(y_train, y_train_pred_typical)
test_error_typical = mean_squared_error(y_test, y_test_pred_typical)

print(" Training Error: {:.3f}, Testing Error: {:.3f}".format(train_error_typical, test_error_typical))







 Training Error: 0.428, Testing Error: 2.604


In [2]:
#def ridge_regression(X, y, alpha):


In [3]:
# Calculate weights for both typical and ridge regression with different alpha values
alpha_values = [0.1, 1, 10, 100]  # Different values of the regularization parameter
train_errors_ridge = []
test_errors_ridge = []
ridge_coefs = []

# Ridge Regression for Different Alphas
for alpha in alpha_values:
    # Train the ridge regression model
    w_ridge = ridge_regression(X_train, y_train, alpha)

    # Make predictions for train and test sets
    y_train_pred_ridge = X_train @ w_ridge
    y_test_pred_ridge = X_test @ w_ridge

    # Calculate mean squared error for train and test sets
    train_errors_ridge.append(mean_squared_error(y_train, y_train_pred_ridge))
    test_errors_ridge.append(mean_squared_error(y_test, y_test_pred_ridge))
    ridge_coefs.append(w_ridge[0][0])  # Save the coefficient to observe the effect of regularization

# Plot train and test errors for typical and ridge regression
plt.figure(figsize=(12, 6))

# Typical Linear Regression Errors (Solid lines)
plt.axhline(y=train_error_typical, color="blue", linestyle='-', label="Train Error (Typical)")
plt.axhline(y=test_error_typical, color="red", linestyle='-', label="Test Error (Typical)")

# Ridge Regression Errors (Dashed lines with markers)
plt.plot(alpha_values, train_errors_ridge, label="Train Error (Ridge)", marker='o', linestyle='--', color="blue")
plt.plot(alpha_values, test_errors_ridge, label="Test Error (Ridge)", marker='o', linestyle='--', color="red")

# Customize plot
plt.xscale("log")  # Log scale for better visualization
plt.xlabel("Regularization Parameter (alpha)")
plt.ylabel("Mean Squared Error")
plt.title("Effect of Regularization on Train and Test Error")
plt.legend()
plt.show()

# Plot coefficients for ridge regression as a function of alpha
plt.figure(figsize=(10, 5))
plt.plot(alpha_values, ridge_coefs, marker='o', label="Ridge Coefficient", linestyle='--')
plt.axhline(y=w_typical[0][0], color='gray', linestyle='-', label="Coefficient (Typical)")
plt.xscale("log")
plt.xlabel("Regularization Parameter (alpha)")
plt.ylabel("Coefficient (w)")
plt.title("Effect of Regularization on Model Coefficient")
plt.legend()
plt.show()

print("Typical Linear Regression - Training Error: {:.3f}, Testing Error: {:.3f}".format(train_error_typical, test_error_typical))
for i, alpha in enumerate(alpha_values):
    print("Ridge Regression (alpha={:.1f}) - Training Error: {:.3f}, Testing Error: {:.3f}".format(alpha, train_errors_ridge[i], test_errors_ridge[i]))


NameError: name 'ridge_regression' is not defined