In [8]:
from sklearn.model_selection import train_test_split
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import PolynomialFeatures, StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.linear_model import RidgeCV, LassoCV
from sklearn.metrics import mean_squared_error
import numpy as np

In [2]:
data = pd.read_csv("ocean_data.csv")

# Drop non-numerical columns
df = data.drop(columns = ['Wea', 'Cloud_Typ', 'Cloud_Amt', 'Visibility'])

target_variable = df['T_degC']

numerical_features = df.drop(columns = ['T_degC'])

X_train, X_test, y_train, y_test = train_test_split(numerical_features, target_variable, test_size=0.25, random_state=307)

In [3]:
pipe = Pipeline([
    ('impute', SimpleImputer(strategy='mean')),
    ('poly_features', PolynomialFeatures(degree=2, include_bias=False)),
    ('scaler', StandardScaler())
])

In [4]:
X_train = pipe.fit_transform(X_train)
X_test = pipe.transform(X_test)

In [6]:
alphas = np.logspace(-6, 6, 30)
ridge_cv = RidgeCV(alphas=alphas, cv=10)
ridge_cv.fit(X_train, y_train)

# (b) Optimal value for alpha
optimal_alpha = ridge_cv.alpha_
print("Optimal value for alpha:", optimal_alpha)

# (c) Test MSE
y_pred_test = ridge_cv.predict(X_test)
test_mse = mean_squared_error(y_test, y_pred_test)
print("Test MSE:", test_mse)

# (d) Value of the coefficient that is largest in magnitude (excluding the intercept)
largest_coefficient = np.max(np.abs(ridge_cv.coef_))
print("Value of the largest coefficient (excluding intercept):", largest_coefficient)

# (e) Number of coefficients exactly equal to 0
num_zero_coefficients = np.sum(ridge_cv.coef_ == 0)
print("Number of coefficients exactly equal to 0:", num_zero_coefficients)

Optimal value for alpha: 1e-06
Test MSE: 1.7543290443307327
Value of the largest coefficient (excluding intercept): 147.56405022443593
Number of coefficients exactly equal to 0: 0


In [10]:
lasso_cv = LassoCV(cv=10)
lasso_cv.fit(X_train, y_train)

# (b) Optimal value for alpha
optimal_alpha = lasso_cv.alpha_
print("Optimal value for alpha:", optimal_alpha)

# (c) Test MSE
y_pred_test = lasso_cv.predict(X_test)
test_mse = mean_squared_error(y_test, y_pred_test)
print("Test MSE:", test_mse)

# (d) Value of the coefficient that is largest in magnitude (excluding the intercept)
largest_coefficient = np.max(np.abs(lasso_cv.coef_))
print("Value of the largest coefficient (excluding intercept):", largest_coefficient)

# (e) Number of coefficients exactly equal to 0
num_zero_coefficients = np.sum(lasso_cv.coef_ == 0)
print("Number of coefficients exactly equal to 0:", num_zero_coefficients)

Optimal value for alpha: 0.009882719034103205
Test MSE: 1.9763166496302538
Value of the largest coefficient (excluding intercept): 2.408803252450286
Number of coefficients exactly equal to 0: 18
