# Regresion model tools

In [16]:
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import PolynomialFeatures, StandardScaler
from sklearn.linear_model import Lasso, Ridge
from sklearn.metrics import mean_absolute_error

# If we use L2 regularization (ridge regression), we end up with a model with smaller coefficients. 
# In other words, L2 regularization shrinks all the coefficients but rarely turns them into zero.
def calculate_mae_with_ridge(X_train, y_train, X_val, y_val, X_test, y_test, degree, alpha=1.0):
    """
    Trains a polynomial regression model with L2 regularization and returns MAE for train, validation, and test sets.
    
    Parameters:
    alpha: float, regularization strength (larger values mean more regularization)
    """
    model = Pipeline([
        #('scaler', StandardScaler()),
        ('poly', PolynomialFeatures(degree=degree, include_bias=False)),
        ('ridge', Ridge(alpha=alpha))  # Usamos Ridge en lugar de LinearRegression, R
    ])
    
    model.fit(X_train, y_train)

    y_train_pred = model.predict(X_train)
    y_val_pred = model.predict(X_val)
    y_test_pred = model.predict(X_test)

    mae_train = mean_absolute_error(y_train, y_train_pred)
    mae_val = mean_absolute_error(y_val, y_val_pred)
    mae_test = mean_absolute_error(y_test, y_test_pred)

    return mae_train, mae_val, mae_test, model


# If we use L1 regularization (lasso regression), you end up with a model with fewer coefficients. 
# In other words, L1 regularization turns some of the coefficients into zero. 
def calculate_mae_with_lasso(X_train, y_train, X_val, y_val, X_test, y_test, degree, alpha=1.0):
    """
    Trains a polynomial regression model with L1 regularization and returns MAE for train, validation, and test sets.
    
    Parameters:
    alpha: float, regularization strength (larger values mean more regularization)
    """
    model = Pipeline([
        ('scaler', StandardScaler()),  # Escalamos los datos para mejorar la convergencia de Lasso
        ('poly', PolynomialFeatures(degree=degree, include_bias=False)), # Convierte X en [X, X², X³, ...]
        ('lasso', Lasso(alpha=alpha, max_iter=10000))  # Aumentamos max_iter para convergencia
    ])
    
    model.fit(X_train, y_train)

    y_train_pred = model.predict(X_train)
    y_val_pred = model.predict(X_val)
    y_test_pred = model.predict(X_test)

    mae_train = mean_absolute_error(y_train, y_train_pred)
    mae_val = mean_absolute_error(y_val, y_val_pred)
    mae_test = mean_absolute_error(y_test, y_test_pred)

    return mae_train, mae_val, mae_test, model


def find_best_regresion_model(X_train, y_train, X_val, y_val, X_test, y_test, degrees, alphas, calculate_mae_with_regression):
    best_degree = None
    best_alpha = None
    best_train_mae = float('inf')
    best_mae_val = float('inf')
    best_test_mae = float('inf')
    best_model = None
    
    for degree in degrees:
        for alpha in alphas:
            mae_train, mae_val, mae_test, model = calculate_mae_with_regression(X_train, y_train, X_val, y_val, X_test, y_test, degree, alpha)
            
            # print(f"Degree: {d}, Alpha: {alpha:.4f}, Train MAE: {mae_train:.2f}, Val MAE: {mae_val:.2f}, Test MAE: {mae_test:.2f}")
            
            if mae_val < best_mae_val:
                best_model = model
                best_train_mae = mae_train
                best_mae_val = mae_val
                best_test_mae = mae_test
                best_degree = degree
                best_alpha = alpha
                

    print("Best Model Found:")
    print(f"Degree: {best_degree}")
    print(f"Alpha: {best_alpha:.4f}")
    print(f"Train MAE: {best_train_mae:.2f}")
    print(f"Validation MAE: {best_mae_val:.2f}")
    print(f"Test MAE: {best_test_mae:.2f}")

    return best_model, best_degree, best_alpha, best_train_mae, best_mae_val, best_test_mae

def get_model_function(model): 
    poly = model.named_steps['poly']
    feature_names = poly.get_feature_names_out(input_features=[f"x{i}" for i in range(X_train.shape[1])])
    coefs = model.named_steps['ridge'].coef_
    intercept = model.named_steps['ridge'].intercept_

    terms = [f"{coef:.3f}*{name}" for coef, name in zip(coefs, feature_names)]
    formula = " + ".join(terms)
    formula = f"{intercept:.3f} + " + formula
    
    return formula

from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import PolynomialFeatures
import numpy as np

# Load and split dataset
X, y = load_diabetes(return_X_y=True)
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.4, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)


# If we use L2 regularization (ridge regression), we end up with a model with smaller coefficients. 
degrees = range(1, 11)
alphas = [0.00001, 0.001, 0.01, 0.1, 1, 10, 100]  # Diferentes valores de alpha para probar
print("Finding best Ridge regression model (L2 Norm)...")
best_model, _, _, _, _, _ = find_best_regresion_model(X_train, y_train, X_val, y_val, X_test, y_test, degrees, alphas, calculate_mae_with_ridge)
coeficients = best_model.named_steps['ridge'].coef_
intercept = best_model.named_steps['ridge'].intercept_
print("Coefficients:", coeficients)
print("Intercept:", intercept)
print("Model formula:", get_model_function(best_model))

# If we use L1 regularization (lasso regression), you end up with a model with fewer coefficients. 
# degrees = range(1, 6)  # Con L1, grados altos pueden ser problemáticos
# alphas = [0.0001, 0.001, 0.01, 0.1, 1, 10]  # Alpha necesita valores más pequeños que con L2
# print("Finding best Lasso regression model (L1 Norm)...")
# find_best_regresion_model(X_train, y_train, X_val, y_val, X_test, y_test,degrees, alphas, calculate_mae_with_lasso)




Finding best Ridge regression model (L2 Norm)...
Best Model Found:
Degree: 2
Alpha: 0.0010
Train MAE: 40.93
Validation MAE: 37.74
Test MAE: 44.47
Coefficients: [ 7.43325137e+01 -2.51582579e+02  5.45836600e+02  3.59550755e+02
 -7.95721260e+02  5.89908704e+02 -4.71208576e+01  2.16861788e+01
  6.60899536e+02  3.69726964e+01  1.13409726e+03  1.74778684e+03
 -4.22837370e+02  9.50042527e+02 -2.18753685e+02 -6.87804634e+02
  3.11279913e+01  5.01180086e+02  1.05996678e+03  6.82979947e+02
 -1.51917693e+00  4.00072587e+02  7.30949446e+02  4.90842257e+02
  5.00878322e+01  7.28529790e+02 -6.01115466e+02  4.35892073e+02
  1.19617989e+02  6.42904593e+02  1.70794842e+03 -5.10952040e+02
 -8.14088704e+01 -6.69023830e+02  4.42486468e+02  1.95327635e+02
  8.92354724e+02 -2.38424931e+02  4.01347881e+02  1.33227593e+02
  2.54300368e+02 -3.41219081e+02  3.64541448e+02 -1.10002663e+03
  4.93685071e+02  5.81690610e+01  5.06832355e+02 -8.10410090e+02
  5.44815595e+02  6.71508148e+02 -5.66891015e+02 -1.58811997