## Lab Assignment no-6

In [36]:
import pandas as pd
from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
import numpy as np

In [37]:
# Load the Diabetes dataset
data = load_diabetes()
X = data.data
y = (data.target > np.median(data.target)).astype(int)  # Convert to binary (0 and 1)


In [38]:
# Split the data into a training and testing set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [39]:
# Train a Logistic Regression model without normalization
model = LogisticRegression()
model.fit(X_train, y_train)


In [40]:
# Calculate accuracy without normalization
accuracy_without_normalization = model.score(X_test, y_test)
print("Accuracy without normalization:", accuracy_without_normalization)


Accuracy without normalization: 0.7415730337078652


In [41]:
# Normalize the features using StandardScaler
scaler = StandardScaler()
X_train_normalized = scaler.fit_transform(X_train)
X_test_normalized = scaler.transform(X_test)


In [42]:
# Train a Logistic Regression model with normalized features
model_normalized = LogisticRegression()
model_normalized.fit(X_train_normalized, y_train)

# Calculate accuracy with normalization
accuracy_with_normalization = model_normalized.score(X_test_normalized, y_test)
print("Accuracy with normalization:", accuracy_with_normalization)

Accuracy with normalization: 0.7303370786516854


##### Ridge Regression (L2-Regularization)

In [44]:
from sklearn.linear_model import Ridge
from sklearn.model_selection import cross_val_score
import numpy as np

# Define a function to calculate RMSE with cross-validation
def rmse_cv(model):
    rmse = np.sqrt(-cross_val_score(model, X, y, scoring="neg_mean_squared_error", cv=5))
    return rmse

# Create a Ridge Regression model with alpha = 0.1
ridgeModel = Ridge(alpha=0.1)

# Calculate RMSE
rmse = rmse_cv(ridgeModel)
print("RMSE estimate: {}, std: {}".format(rmse.mean(), rmse.std()))

RMSE estimate: 0.40713972351163585, std: 0.025082656181039484


##### Lasso Regression (L1-Regularization)

In [45]:
from sklearn.linear_model import Lasso

# determine RMSE for lasso regression model with alpha = 0.1
lassoModel = Lasso(alpha = 0.1)
rmse = rmse_cv(lassoModel)
print("RMSE estimate: {}, std: {}".format(rmse.mean(), rmse.std()))

RMSE estimate: 0.5004371979302287, std: 0.0005346510117592709


In [47]:
# Define a list of alpha values to test
from sklearn.metrics import mean_squared_error
alphas = [0.001, 0.01, 0.1, 1, 10]

# Initialize dictionaries to store RMSE values for L1 and L2 regularization
rmse_values_l1 = {}
rmse_values_l2 = {}

for alpha in alphas:
    # Train Lasso (L1) Regression model
    lasso = Lasso(alpha=alpha)
    lasso.fit(X_train, y_train)
    y_pred = lasso.predict(X_test)
    rmse_l1 = np.sqrt(mean_squared_error(y_test, y_pred))
    rmse_values_l1[alpha] = rmse_l1

    # Train Ridge (L2) Regression model
    ridge = Ridge(alpha=alpha)
    ridge.fit(X_train, y_train)
    y_pred = ridge.predict(X_test)
    rmse_l2 = np.sqrt(mean_squared_error(y_test, y_pred))
    rmse_values_l2[alpha] = rmse_l2

In [48]:
# Compare RMSE values for L1 (Lasso) and L2 (Ridge) regularization
print("RMSE Values for L1 Regularization (Lasso):")
for alpha, rmse in rmse_values_l1.items():
    print(f"Alpha = {alpha}: RMSE = {rmse}")

print("\nRMSE Values for L2 Regularization (Ridge):")
for alpha, rmse in rmse_values_l2.items():
    print(f"Alpha = {alpha}: RMSE = {rmse}")

RMSE Values for L1 Regularization (Lasso):
Alpha = 0.001: RMSE = 0.4020877530383256
Alpha = 0.01: RMSE = 0.4888472615385296
Alpha = 0.1: RMSE = 0.5014495182138564
Alpha = 1: RMSE = 0.5014495182138564
Alpha = 10: RMSE = 0.5014495182138564

RMSE Values for L2 Regularization (Ridge):
Alpha = 0.001: RMSE = 0.40113303965518204
Alpha = 0.01: RMSE = 0.40252164962660203
Alpha = 0.1: RMSE = 0.4045435669958406
Alpha = 1: RMSE = 0.4192792972764397
Alpha = 10: RMSE = 0.4722425050177734
