In [70]:
from sklearn.datasets import load_diabetes
from sklearn.linear_model import LinearRegression,Ridge,Lasso,ElasticNet
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score

In [71]:
X,y = load_diabetes(return_X_y=True)

In [72]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=2)

In [73]:
# Linear Regression
reg = LinearRegression()
reg.fit(X_train,y_train)
y_pred = reg.predict(X_test)
r2_score(y_test,y_pred)

0.4399387660024645

In [74]:
# Ridge 
reg = Ridge(alpha=0.1)
reg.fit(X_train,y_train)
y_pred = reg.predict(X_test)
r2_score(y_test,y_pred)

0.4519973816947852

In [75]:
# Lasso
reg = Lasso(alpha=0.01)
reg.fit(X_train,y_train)
y_pred = reg.predict(X_test)
r2_score(y_test,y_pred)

0.4411227990495632

In [76]:
# ElasticNet
reg = ElasticNet(alpha=0.005,l1_ratio=0.9)
reg.fit(X_train,y_train)
y_pred = reg.predict(X_test)
r2_score(y_test,y_pred)

0.4531493801165679

In [None]:
# Cross Validation import karte hai
from sklearn.model_selection import cross_val_score, GridSearchCV
import numpy as np

# Simple k-fold cross validation
scores = cross_val_score(LinearRegression(), X, y, cv=5, scoring='r2')
print(f"Linear Regression CV scores: {scores}")
print(f"Average CV score: {scores.mean():.4f} (+/- {scores.std() * 2:.4f})")

In [None]:
# ElasticNet ke liye hyperparameter tuning with Cross Validation
from sklearn.model_selection import GridSearchCV

# Parameter grid define karte hai
param_grid = {
    'alpha': [0.001, 0.005, 0.01, 0.05, 0.1],
    'l1_ratio': [0.1, 0.3, 0.5, 0.7, 0.9]
}

# GridSearchCV use karte hai
elastic_cv = GridSearchCV(ElasticNet(), param_grid, cv=5, scoring='r2')
elastic_cv.fit(X_train, y_train)

print(f"Best parameters: {elastic_cv.best_params_}")
print(f"Best CV score: {elastic_cv.best_score_:.4f}")

# Best model se prediction
y_pred = elastic_cv.predict(X_test)
print(f"Test R2 score: {r2_score(y_test, y_pred):.4f}")

In [None]:
# Sabhi models ko compare karte hai using Cross Validation
models = {
    'Linear Regression': LinearRegression(),
    'Ridge': Ridge(alpha=0.1),
    'Lasso': Lasso(alpha=0.01),
    'ElasticNet': ElasticNet(alpha=0.005, l1_ratio=0.9)
}

for name, model in models.items():
    scores = cross_val_score(model, X, y, cv=5, scoring='r2')
    print(f"{name}: {scores.mean():.4f} (+/- {scores.std() * 2:.4f})")

In [None]:
# Normal Cross Validation - Ek model, ek setting
model = ElasticNet(alpha=0.01, l1_ratio=0.5)
scores = cross_val_score(model, X, y, cv=5)
print(f"Ek specific model ka score: {scores.mean():.4f}")

# GridSearchCV - Bahut saare models, best wala dhundo
param_grid = {
    'alpha': [0.001, 0.01, 0.1],      # 3 values
    'l1_ratio': [0.3, 0.5, 0.7]       # 3 values
}
# Total combinations = 3 x 3 = 9 different models

grid = GridSearchCV(ElasticNet(), param_grid, cv=5)
# Har model ko 5 baar test karega (CV)
# Total experiments = 9 models × 5 folds = 45 experiments!

# Summary:
#   - Cross Validation = Fair testing method (5 baar test)
#   - GridSearchCV = Cross Validation use karke best hyperparameters dhundo

#   Real life analogy:
#   - CV = Ek chef ki dish 5 different judges se taste karwana
#   - GridSearchCV = 10 chefs ki dishes, har dish 5 judges se taste karwana, phir best chef choose karna

#   GridSearchCV internally Cross Validation use karta hai har parameter combination ke liye!

grid.fit(X, y)
print(f"\nBest parameters mile: {grid.best_params_}")
print(f"Best score: {grid.best_score_:.4f}")