In [16]:
# DS4400 HW 2
# Problem 6: Ridge Regularization
import csv
import pandas as pd
import numpy as np
from sklearn.metrics import *
from sklearn.preprocessing import StandardScaler

house_data = pd.read_csv("kc_house_data.csv")
train_data = pd.read_csv("train.csv")
test_data = pd.read_csv("test.csv")

In [17]:
def ridge_gradient_descent(X, y, alpha, iterations, lam):
    ''' function that executes ridge regression gradient descent (modifed from problem 5)'''
    N, d = X.shape
    theta = np.zeros(d)
    y = y.flatten()

    for _ in range(iterations):
        y_pred = X @ theta
        gradient = (2 / N) * (X.T @ (y_pred - y))
        gradient[1:] += 2 * lam * theta[1:]
        theta = theta - alpha * gradient
    return theta

In [18]:
# Simulate N=1000 values of random variable X_i, distributed uniformly on interval [-2,2].
# given values
N = 1000
X = np.random.uniform(-2, 2, size=(N, 1))
e = np.random.normal(0, 2, size=(N, 1))
Y = 1 + 2 * X + e

X_with_intercept = np.c_[np.ones(N), X]

# execute the linear regression without the regularization
theta = ridge_gradient_descent(X_with_intercept, Y, alpha = 0.0001, iterations = 1000, lam = 0)
y_pred = X_with_intercept @ theta

# print the slope, MSE, and r squared
print("Linear Regression:")
print("Slope:", round(theta[1], 2))
print("MSE:", round(mean_squared_error(Y, y_pred), 2))
print("R^2:", round(r2_score(Y, y_pred), 2))

# execute the ridge regression with different lamdas (choose small alpha value to not break the model)
lambdas = [1, 10, 100, 1000, 10000]

for val in lambdas:
    theta_ridge = ridge_gradient_descent(X_with_intercept, Y, alpha = 0.0001, iterations = 1000, lam = val)
    y_pred_ridge = X_with_intercept @ theta_ridge
    print(f"\nRidge Regression (lambda = {val})")
    print("Slope:", round(theta_ridge[1], 2))
    print("MSE:", round(mean_squared_error(Y, y_pred_ridge), 2))
    print("R^2:", round(r2_score(Y, y_pred_ridge), 2))


Linear Regression:
Slope: 0.48
MSE: 7.6
R^2: 0.19

Ridge Regression (lambda = 1)
Slope: 0.43
MSE: 7.78
R^2: 0.18

Ridge Regression (lambda = 10)
Slope: 0.22
MSE: 8.78
R^2: 0.07

Ridge Regression (lambda = 100)
Slope: 0.03
MSE: 9.75
R^2: -0.03

Ridge Regression (lambda = 1000)
Slope: 0.0
MSE: 9.88
R^2: -0.05

Ridge Regression (lambda = 10000)
Slope: -0.0
MSE: 9.9
R^2: -0.05


3. Print the slope, the MSE values, and the $R^2$ statistic for each case and write down some observations. What happens as the regularization parameter $\lambda$ increases?

The values for the slope, MSE values, and the $R^2$ statistic for each case are listed above. The overall pattern for all the values is that slope decreases, MSE increases, and $R^2$ decreases. As lambda increases for thr ridge regression the slope decreases (gets closer to 0), MSE slightly increases, and $R^2$ slightly decreases. Therefore as this value increases, it gets closer to 0.