In [32]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn.pipeline import make_pipeline
from sklearn.feature_selection import SelectKBest, f_classif

# 🔹 Step 1: Load dataset
df = pd.read_csv("diabetes_prediction_dataset.csv")

# 🔹 Step 2: Encode categorical columns if any
df_encoded = pd.get_dummies(df, drop_first=True)

# Features & Target
X = df_encoded.drop("diabetes", axis=1).head(10)
y = df_encoded["diabetes"]




# 🔹 Step 4: Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(
    X_reduced, y, test_size=0.3, random_state=42
)

# 🔹 Step 5: Polynomial degree (keep small to avoid explosion)
poly_degree = 3

# 🔹 Step 6: Define models with stronger regularization
models = {
    "Polynomial Linear": LinearRegression(),
    "Polynomial Ridge (α=100)": Ridge(alpha=100.0),
    "Polynomial Lasso (α=1)": Lasso(alpha=0.001),
    "Polynomial ElasticNet (α=1, l1=0.7)": ElasticNet(alpha=0.01, l1_ratio=.8)
}

# 🔹 Step 7: Train & Evaluate
results = {}
for name, model in models.items():
    pipe = make_pipeline(
        PolynomialFeatures(degree=poly_degree, include_bias=False),
        StandardScaler(),
        model
    )
    pipe.fit(X_train, y_train)

    y_pred = pipe.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    results[name] = mse

# ✅ Show Test MSE Results
results_table = pd.DataFrame(list(results.items()), columns=["Model", "Test MSE"])
print(results_table.to_string(index=False))


                              Model  Test MSE
                  Polynomial Linear  0.035063
           Polynomial Ridge (α=100)  0.036724
             Polynomial Lasso (α=1)  0.038216
Polynomial ElasticNet (α=1, l1=0.7)  0.042298
