In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, Ridge, RidgeCV, SGDRegressor
from sklearn.metrics import r2_score, mean_squared_error

from sklearn.pipeline import Pipeline
from sklearn.preprocessing import PolynomialFeatures, StandardScaler

In [None]:
X, y = load_diabetes(return_X_y=True)


In [None]:
print(X)

# With Simple Linear Regression

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [None]:
lr = LinearRegression()

lr.fit(X_train, y_train)

y_pred = lr.predict(X_test)

r2score = r2_score(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred)) 
print(r2score)
print(rmse)

# With Ridge Regularization

In [None]:
# NOTE WE CAN ALSO USE SOLVER DIRECTLY IN RIDGE TO IMPLEMENT GRADIENT DESCENT HERE

ridge_alpha = 70
degree = 2
with_bias = False

model = Pipeline([
    ('poly_feats', PolynomialFeatures(degree = degree, include_bias = with_bias )),
    ('standard_scaling', StandardScaler()),
    ('ridge', Ridge(alpha=ridge_alpha))
    #('ridge', RidgeCV(alphas=[0.1, 1, 10, 100]))
])

model.fit(X_train, y_train)

r_y_pred = model.predict(X_test)

r_r2score = r2_score(y_test, r_y_pred)
r_rmse = np.sqrt(mean_squared_error(y_test, r_y_pred))

print(f"r_r2score : {r_r2score} ")
print(f"r_rmse : {r_rmse} ")



In [None]:
#print(y_train)

In [None]:
plt.figure(figsize=(20,5))
plt.plot(y_test.values if hasattr(y_test, "values") else y_test, label="Actual", marker='o')
plt.plot(r_y_pred, label="Predicted", marker='x')
plt.xlabel("Sample Index")
plt.ylabel("Target Value")
plt.title("Actual vs Predicted Values (Ridge Regression)")
plt.legend()
plt.show()

In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(7,7))
plt.scatter(y_test, r_y_pred, alpha=0.6)
plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'r--', lw=2)  # perfect prediction line
plt.xlabel("Actual Values")
plt.ylabel("Predicted Values")
plt.title("Ridge Regression Predictions vs Actual")
plt.show()


# Polynomial Features With Ridge Regularization inside SGD Rgressor for large dataset

In [None]:
# we have to play with combination of ridge_alpha & degree to create best hyperparameters

# NOTE WE CAN ALSO USE SOLVER DIRECTLY IN RIDGE TO IMPLEMENT GRADIENT DESCENT THERE

ridge_alpha = 0.2
degree = 2
with_bias = True

model = Pipeline([
    ('poly_feats', PolynomialFeatures(degree = degree, include_bias = with_bias )),
    ('standard_scaling', StandardScaler()),
    ("sgd", SGDRegressor(
        penalty="l2",       # Ridge-like regularization
        alpha = ridge_alpha,        # Regularization strength
        max_iter=1000,      # more iterations for convergence
        tol=1e-3, # tolerance for stopping criterion
        random_state=42
    ))
    #('ridge', RidgeCV(alphas=[0.1, 1, 10, 100]))
])

model.fit(X_train, y_train)

r_y_pred = model.predict(X_test)

r_r2score = r2_score(y_test, r_y_pred)
r_rmse = np.sqrt(mean_squared_error(y_test, r_y_pred))

print(f"r_r2score : {r_r2score} ")
print(f"r_rmse : {r_rmse} ")

# SGD VS RIDGE

**SGDRegressor with Polynomial Features**

1. SGDRegressor uses stochastic gradient descent to optimize a linear model, which can include polynomial features.
2. It is efficient for large-scale datasets with many samples.
3. Regularization can be applied (L2, L1, or Elastic Net), but hyperparameter tuning (e.g., learning rate, number of iterations) is critical.
4. SGD may converge slower or be more sensitive to parameter settings and scaling of features.
5. Often used for online or incremental learning scenarios or when computation speed and memory efficiency are priorities.

**Ridge Regression with Polynomial Features**

1. Ridge regression explicitly adds L2 regularization to the least squares cost function to prevent overfitting on polynomial features.
2. It usually provides more stable and interpretable solutions for polynomial regression tasks.
3. Ridge tends to achieve better and more consistent generalization performance because the solution minimizes a convex problem in a closed form or via efficient solvers.
4. Better suited for small to medium datasets where full batch optimization is feasible.
5. Often preferred when control over regularization strength and model interpretability is important.