<a href="https://colab.research.google.com/github/drcrajc/statsassignmentcodes/blob/main/stats_assignments_code3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
from sklearn.linear_model import Ridge
from sklearn.preprocessing import StandardScaler

# Given data points
data = [
    (-8, 9224112129.09), (-6, 528402701.97), (0, 8.65), (20, 78488937652962.66),
    (12, 466776459773.15), (-11, 224713868127.79), (14, 2251535746819.93),
    (-16, 8777378093835.01), (-9, 28796454086.62), (-13, 1198832738446.63),
    (5, 66178517.68), (6, 425575429.26), (19, 49567223852493.99), (-1, 1.01),
    (8, 7588400172.5), (7, 2106950267.55), (1, 7.13), (15, 4231173984679.49),
    (-10, 82040211341.46), (-2, 8169), (-7, 2449694140.86), (-17, 16618296037477.04)
]

# Extract x and y values from the data points
x = np.array([point[0] for point in data])
y = np.array([point[1] for point in data])

# Step 1: Normalize the x values
scaler = StandardScaler()
x_normalized = scaler.fit_transform(x.reshape(-1, 1)).flatten()  # Flatten to 1D array

# Step 2: Construct the design matrix for a 10th-degree polynomial with normalized x values
X_normalized = np.vander(x_normalized, 11)  # Vandermonde matrix for polynomial terms

# Step 3: Calculate the OLS estimate using the normal equation
# OLS: alpha_ols = (X.T * X)^(-1) * X.T * y
alpha_ols = np.linalg.inv(X_normalized.T @ X_normalized) @ X_normalized.T @ y

# Step 4: Perform ridge regression
lambda_value = 1000  # Regularization strength (penalty weight)
ridge = Ridge(alpha=lambda_value, fit_intercept=False)  # Ridge regression without intercept
ridge.fit(X_normalized, y)  # Fit the model to the data
alpha_ridge = ridge.coef_  # Coefficients of the ridge regression model

# Print the results
print("OLS Estimates:", alpha_ols)
print("Ridge Regularized Estimates:", alpha_ridge)


OLS Estimates: [-1.56695647e+11 -3.25181230e+11  2.45655436e+12  2.23340366e+12
 -5.38355439e+12 -4.18695902e+12  4.45319565e+12  2.81662448e+12
 -1.08390846e+12 -4.87167397e+11  9.23576105e+09]
Ridge Regularized Estimates: [ 1.80910686e+11  5.33511900e+10  6.74488219e+10  1.79037934e+10
  2.39838957e+10  5.21448066e+09  7.90005888e+09  1.03964778e+09
  2.18929582e+09 -1.43587882e+08  2.71850265e+08]
