Title: Regularization Techniques

Task 1: Ridge Regression on House Prices<br>
Apply Ridge regularization to the Linear Regression model.

In [1]:
from sklearn.datasets import fetch_california_housing
from sklearn.linear_model import Ridge
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error
import numpy as np

# Load California Housing dataset
data = fetch_california_housing(as_frame=True)
X = data.data
y = data.target

# Split into train and test sets (80%-20%)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize Ridge Regression model with alpha (regularization strength)
ridge = Ridge(alpha=1.0)  # You can tune alpha

# Train the model
ridge.fit(X_train, y_train)

# Predict on test set
y_pred = ridge.predict(X_test)

# Evaluate performance
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
mae = mean_absolute_error(y_test, y_pred)

print(f"Ridge Regression Performance:")
print(f" RMSE: {rmse:.4f}")
print(f" MAE: {mae:.4f}")


Ridge Regression Performance:
 RMSE: 0.7455
 MAE: 0.5332


Task 2: Lasso Regression for Feature Selection<br>
Use Lasso to select important features in the housing data.

In [2]:
from sklearn.datasets import fetch_california_housing
from sklearn.linear_model import Lasso
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np

# Load dataset
data = fetch_california_housing(as_frame=True)
X = data.data
y = data.target

# Split dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize Lasso model with alpha (regularization strength)
lasso = Lasso(alpha=0.1)  # Tune alpha as needed

# Train model
lasso.fit(X_train, y_train)

# Get coefficients
coef = pd.Series(lasso.coef_, index=X.columns)

# Print coefficients (features with zero coef are effectively excluded)
print("Lasso selected features and their coefficients:")
print(coef)

# Show features selected (non-zero coefficients)
selected_features = coef[coef != 0].index.tolist()
print("\nFeatures selected by Lasso:")
print(selected_features)


Lasso selected features and their coefficients:
MedInc        0.392693
HouseAge      0.015081
AveRooms     -0.000000
AveBedrms     0.000000
Population    0.000016
AveOccup     -0.003149
Latitude     -0.114291
Longitude    -0.099308
dtype: float64

Features selected by Lasso:
['MedInc', 'HouseAge', 'Population', 'AveOccup', 'Latitude', 'Longitude']


Task 3: ElasticNet for Customer Churn<br>
Implement ElasticNet to balance between Ridge and Lasso.

In [4]:
from sklearn.datasets import make_classification
from sklearn.linear_model import SGDClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report

# Create synthetic classification data (simulating churn)
X, y = make_classification(n_samples=1000, n_features=20,
                           n_informative=10, n_redundant=5,
                           n_classes=2, weights=[0.7, 0.3],
                           random_state=42)

# Split dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize SGDClassifier with elasticnet penalty and updated loss name
model = SGDClassifier(loss='log_loss', penalty='elasticnet', l1_ratio=0.5, max_iter=1000, random_state=42)

# Train model
model.fit(X_train, y_train)

# Predict on test set
y_pred = model.predict(X_test)

# Evaluate performance
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))


Accuracy: 0.765

Classification Report:
               precision    recall  f1-score   support

           0       0.89      0.78      0.83       148
           1       0.54      0.71      0.61        52

    accuracy                           0.77       200
   macro avg       0.71      0.75      0.72       200
weighted avg       0.79      0.77      0.77       200

