# Part 1: Regression Task (California Housing)

Task 1: Load and Split Dataset

In [10]:
import numpy as np
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.metrics import mean_squared_error

In [None]:

# Load California Housing dataset and split 80/20
X, y = fetch_california_housing(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Task 2: Complete all the Task

â€¢ Regression Task (California Housing):

Step 1: Baseline Model (No Regularization) Build a Linear Regression model without
any regularization.

In [14]:
# Baseline Linear Regression
lin_reg = LinearRegression()
lin_reg.fit(X_train, y_train)

print("Baseline Linear Regression")
print("Coefficients:", lin_reg.coef_)
print("Intercept:", lin_reg.intercept_)

# Predictions
y_train_pred = lin_reg.predict(X_train)
y_test_pred = lin_reg.predict(X_test)

# MSE on train and test
print("Train MSE:", mean_squared_error(y_train, y_train_pred))
print("Test MSE:", mean_squared_error(y_test, y_test_pred))


Baseline Linear Regression
Coefficients: [-2.8246201e+01  7.8129196e-01  1.6246902e+03  4.4437439e+01
 -2.8925183e+02 -6.3279575e+01  2.8548874e+02 -5.7364273e+00]
Intercept: 142569.69
Train MSE: 11132529664.0
Test MSE: 11496187904.0


Step 2: Hyperparameter Tuning Use GridSearchCV or RandomizedSearchCV to tune
hyperparameters for Ridge and Lasso regression models.

In [15]:
# Alpha grid for Ridge and Lasso
alpha_grid = np.logspace(-3, 0, 13)  # 0.001 to 1

ridge = Ridge(random_state=42)
lasso = Lasso(random_state=42, max_iter=10000)

ridge_cv = GridSearchCV(
    ridge,
    {"alpha": alpha_grid},
    cv=5,
    scoring="neg_mean_squared_error",
    n_jobs=-1,
)

lasso_cv = GridSearchCV(
    lasso,
    {"alpha": alpha_grid},
    cv=5,
    scoring="neg_mean_squared_error",
    n_jobs=-1,
)

ridge_cv.fit(X_train, y_train)
lasso_cv.fit(X_train, y_train)

print("\nHyperparameter Tuning Results")
print("Best Ridge alpha:", ridge_cv.best_params_["alpha"])
print("Best Ridge CV MSE:", -ridge_cv.best_score_)
print("Best Lasso alpha:", lasso_cv.best_params_["alpha"])
print("Best Lasso CV MSE:", -lasso_cv.best_score_)


  return f(*arrays, *other_args, **kwargs)



Hyperparameter Tuning Results
Best Ridge alpha: 0.31622776601683794
Best Ridge CV MSE: 4834600652.8
Best Lasso alpha: 0.01778279410038923
Best Lasso CV MSE: 4834599833.6


  model = cd_fast.enet_coordinate_descent(


Step 3: Regularization Experiments (L1 vs L2) Train L1 (Lasso) and L2 (Ridge) regres-
sion models using the optimal hyperparameters.

In [16]:
# Best estimators from GridSearchCV
best_ridge = ridge_cv.best_estimator_
best_lasso = lasso_cv.best_estimator_

# Predictions with best Ridge and Lasso
ridge_train_pred = best_ridge.predict(X_train)
ridge_test_pred = best_ridge.predict(X_test)

lasso_train_pred = best_lasso.predict(X_train)
lasso_test_pred = best_lasso.predict(X_test)

print("\nRidge (L2) with best alpha")
print("Coefficients:", best_ridge.coef_)
print("Train MSE:", mean_squared_error(y_train, ridge_train_pred))
print("Test MSE:", mean_squared_error(y_test, ridge_test_pred))

print("\nLasso (L1) with best alpha")
print("Coefficients:", best_lasso.coef_)
print("Train MSE:", mean_squared_error(y_train, lasso_train_pred))
print("Test MSE:", mean_squared_error(y_test, lasso_test_pred))

print("\nNumber of zero coefficients in Lasso:", np.sum(best_lasso.coef_ == 0))
print("Number of zero coefficients in Ridge:", np.sum(best_ridge.coef_ == 0))



Ridge (L2) with best alpha
Coefficients: [-4.2216805e+04 -4.1831590e+04  1.1740933e+03 -9.0424948e+00
  1.2510568e+02 -3.7414753e+01  3.8440994e+01  4.0550566e+04]
Train MSE: 4800264704.0
Test MSE: 4965660160.0

Lasso (L1) with best alpha
Coefficients: [-4.2219723e+04 -4.1834277e+04  1.1740461e+03 -9.0422068e+00
  1.2510985e+02 -3.7414795e+01  3.8434246e+01  4.0550348e+04]
Train MSE: 4800263680.0
Test MSE: 4965664768.0

Number of zero coefficients in Lasso: 0
Number of zero coefficients in Ridge: 0


# Part 2: Classification Task (Breast Cancer)


Task 1: Load and Split Dataset

In [None]:
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split, GridSearchCV

In [None]:
# Load dataset
X, y = load_breast_cancer(return_X_y=True)

# Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

Task 2: Complete all the Task

Step 1: Baseline Model (No Regularization) Build a Logistic Regression model without
specifying any regularization (default settings).

In [None]:
# Initialize and train the model
model = LogisticRegression(max_iter=10000)
model.fit(X_train, y_train)

# Print results
print("Coefficients:\n", model.coef_)
print("Train accuracy:", accuracy_score(y_train, model.predict(X_train)))
print("Test accuracy:", accuracy_score(y_test, model.predict(X_test)))

Coefficients:
 [[ 1.0274368   0.22145051 -0.36213488  0.0254667  -0.15623532 -0.23771256
  -0.53255786 -0.28369224 -0.22668189 -0.03649446 -0.09710208  1.3705667
  -0.18140942 -0.08719575 -0.02245523  0.04736092 -0.04294784 -0.03240188
  -0.03473732  0.01160522  0.11165329 -0.50887722 -0.01555395 -0.016857
  -0.30773117 -0.77270908 -1.42859535 -0.51092923 -0.74689363 -0.10094404]]
Train accuracy: 0.9582417582417583
Test accuracy: 0.956140350877193


Step 2: Hyperparameter Tuning Use GridSearchCV or RandomizedSearchCV to tune
hyperparameters for logistic regression models with regularization.

In [None]:
# Define parameters
params = {
    'penalty': ['l1', 'l2'],
    'C': np.logspace(-3, 3, 7),
    'solver': ['liblinear']
}

# Run Grid Search
clf = GridSearchCV(LogisticRegression(max_iter=10000), params, cv=5, n_jobs=-1)
clf.fit(X_train, y_train)

# Print results
print("Best params:", clf.best_params_)
print("Best CV score:", clf.best_score_)
print("Test score:", clf.score(X_test, y_test))

Best params: {'C': np.float64(100.0), 'penalty': 'l1', 'solver': 'liblinear'}
Best CV score: 0.9670329670329672
Test score: 0.9824561403508771


Step 3: Regularization Experiments (L1 vs L2)

In [None]:
# Get best C value
C_val = clf.best_params_["C"]

# ----- L1 Logistic Regression -----
l1_model = LogisticRegression(penalty="l1", C=C_val,
                              solver="liblinear", max_iter=10000)
l1_model.fit(X_train, y_train)

print("\nL1 Coefficients:\n", l1_model.coef_)
print("L1 zeros:", (l1_model.coef_ == 0).sum())
print("L1 train acc:", accuracy_score(y_train, l1_model.predict(X_train)))
print("L1 test acc:", accuracy_score(y_test, l1_model.predict(X_test)))


# ----- L2 Logistic Regression -----
l2_model = LogisticRegression(penalty="l2", C=C_val,
                              solver="liblinear", max_iter=10000)
l2_model.fit(X_train, y_train)

print("\nL2 Coefficients:\n", l2_model.coef_)
print("L2 zeros:", (l2_model.coef_ == 0).sum())
print("L2 train acc:", accuracy_score(y_train, l2_model.predict(X_train)))
print("L2 test acc:", accuracy_score(y_test, l2_model.predict(X_test)))


L1 Coefficients:
 [[ 7.44314892e-01 -1.08510446e-01  1.01670306e-01 -2.52688060e-03
   0.00000000e+00  4.70385813e+01 -1.16940789e+01 -1.36328419e+02
   1.97216556e+01  0.00000000e+00  0.00000000e+00  1.73272997e+00
   0.00000000e+00 -1.97707474e-01  0.00000000e+00  0.00000000e+00
   5.00968412e+01  0.00000000e+00  1.87306760e+01  0.00000000e+00
   2.24263633e-01 -4.38890187e-01  5.52266214e-02 -2.10565412e-02
  -2.16607738e+01  8.02112566e+00 -1.46478294e+01 -2.53343820e+01
  -2.53329679e+01  0.00000000e+00]]
L1 zeros: 9
L1 train acc: 0.989010989010989
L1 test acc: 0.9824561403508771

L2 Coefficients:
 [[ 5.40272741  0.26573732 -0.52651203 -0.02095479 -2.29922151 -0.2169419
  -3.56980218 -5.0110607  -2.26418385  0.36762747 -0.58036525  3.84169255
  -0.63875637 -0.10714305 -0.40591246  3.57237002  4.36577514 -0.26636925
   0.39354765  0.62630698 -0.20718582 -0.68999884  0.17661962 -0.01813087
  -4.67716843 -0.0140846  -4.46493229 -7.61271125 -6.83571871  0.57463274]]
L2 zeros: 0
L2 tr