In [37]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.metrics import r2_score, mean_absolute_error
from sklearn.preprocessing import PolynomialFeatures

In [38]:
diabetes = load_diabetes()
X = diabetes.data
y = diabetes.target


In [39]:
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, random_state=42)
X_valid, X_test, y_valid, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

In [40]:
linear_reg = LinearRegression()
linear_reg.fit(X_train, y_train)
y_pred_linear = linear_reg.predict(X_valid)

In [41]:
poly_reg_bmi = PolynomialFeatures(degree=2, include_bias=False)
X_train_bmi_poly = poly_reg_bmi.fit_transform(X_train[:, [2]])
X_valid_bmi_poly = poly_reg_bmi.transform(X_valid[:, [2]])
linear_reg_bmi_poly = LinearRegression()
linear_reg_bmi_poly.fit(X_train_bmi_poly, y_train)
y_pred_bmi_poly = linear_reg_bmi_poly.predict(X_valid_bmi_poly)

In [42]:
poly_reg_all = PolynomialFeatures(degree=2, include_bias=False)
X_train_all_poly = poly_reg_all.fit_transform(X_train)
X_valid_all_poly = poly_reg_all.transform(X_valid)
linear_reg_all_poly = LinearRegression()
linear_reg_all_poly.fit(X_train_all_poly, y_train)
y_pred_all_poly = linear_reg_all_poly.predict(X_valid_all_poly)

In [43]:
def evaluate_model(y_true, y_pred):
    r_squared = r2_score(y_true, y_pred)
    mape = np.mean(np.abs((y_true - y_pred) / y_true)) * 100
    mae = mean_absolute_error(y_true, y_pred)
    return r_squared, mape, mae

r_squared_linear, mape_linear, mae_linear = evaluate_model(y_valid, y_pred_linear)
r_squared_bmi_poly, mape_bmi_poly, mae_bmi_poly = evaluate_model(y_valid, y_pred_bmi_poly)
r_squared_all_poly, mape_all_poly, mae_all_poly = evaluate_model(y_valid, y_pred_all_poly)

print("multivariate linea regression")
print(f"rsquared {r_squared_linear}")
print(f"MAPE {mape_linear}%")
print(f"MAE {mae_linear}")

print("\npolynomial regression on BMI")
print(f"rsquared {r_squared_bmi_poly}")
print(f"MAPE {mape_bmi_poly}%")
print(f"MAE {mae_bmi_poly}")

print("\nmultivariate polynomial regression")
print(f"rsqured {r_squared_all_poly}")
print(f"MAPE {mape_all_poly}%")
print(f"MAE {mae_all_poly}")

multivariate linea regression
rsquared 0.5112619269090262
MAPE 34.61633710712247%
MAE 38.216681372349036

polynomial regression on BMI
rsquared 0.296223055272985
MAPE 41.90243458933215%
MAE 48.27302777867063

multivariate polynomial regression
rsqured 0.36717480117280155
MAPE 38.08962481749319%
MAE 42.47137889140918


In [44]:
num_features_linear = X_train.shape[1]
params_linear = num_features_linear + 1 
degree = 2 
poly_reg_bmi = PolynomialFeatures(degree=degree, include_bias=False)
X_train_bmi_poly = poly_reg_bmi.fit_transform(X_train[:, [2]])
params_bmi_poly = X_train_bmi_poly.shape[1]

degree = 2 
poly_reg_all = PolynomialFeatures(degree=degree, include_bias=False)
X_train_all_poly = poly_reg_all.fit_transform(X_train)
params_all_poly = X_train_all_poly.shape[1]

print("number of parameters")
print(f"mutivariate linear regression {params_linear} ")
print(f"polynomial regression on BMI {params_bmi_poly} ")
print(f"multivariatepolynomial regression {params_all_poly}")


number of parameters
mutivariate linear regression 11 
polynomial regression on BMI 2 
multivariatepolynomial regression 65


>For simplicity the polynomial regression on bmi with low number of parameter will be a good choice.

>If overall model performance is important then multivariate polynomial regression with higher num of parameters will be a good choice.

>The multivariate linear regression provides a balanced approach .

>The choice of deployment is based on the specific requirements.