In [2]:
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.metrics import r2_score, mean_absolute_error, mean_absolute_percentage_error

# Loading the Diabetes dataset
diabetes_X, diabetes_y = datasets.load_diabetes(return_X_y=True)

# Train-validation-test split
X_train, X_temp, y_train, y_temp = train_test_split(diabetes_X, diabetes_y, test_size=0.3, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

# Multivariate Linear Regression
linear_reg = LinearRegression()
linear_reg.fit(X_train, y_train)
linear_reg_predictions = linear_reg.predict(X_val)
linear_reg_r2 = r2_score(y_val, linear_reg_predictions)
linear_reg_mae = mean_absolute_error(y_val, linear_reg_predictions)
linear_reg_mape = mean_absolute_percentage_error(y_val, linear_reg_predictions)

# Polynomial Regression (2nd degree) on BMI feature alone
poly_features = PolynomialFeatures(degree=2, include_bias=False)
X_train_bmi_poly = poly_features.fit_transform(X_train[:, [2]])
X_val_bmi_poly = poly_features.transform(X_val[:, [2]])
poly_reg_bmi = LinearRegression()
poly_reg_bmi.fit(X_train_bmi_poly, y_train)
poly_reg_bmi_predictions = poly_reg_bmi.predict(X_val_bmi_poly)
poly_reg_bmi_r2 = r2_score(y_val, poly_reg_bmi_predictions)
poly_reg_bmi_mae = mean_absolute_error(y_val, poly_reg_bmi_predictions)
poly_reg_bmi_mape = mean_absolute_percentage_error(y_val, poly_reg_bmi_predictions)

# Multivariate Polynomial Regression (2nd degree) on all variables
poly_features_all = PolynomialFeatures(degree=2, include_bias=False)
X_train_poly = poly_features_all.fit_transform(X_train)
X_val_poly = poly_features_all.transform(X_val)
poly_reg_all = LinearRegression()
poly_reg_all.fit(X_train_poly, y_train)
poly_reg_all_predictions = poly_reg_all.predict(X_val_poly)
poly_reg_all_r2 = r2_score(y_val, poly_reg_all_predictions)
poly_reg_all_mae = mean_absolute_error(y_val, poly_reg_all_predictions)
poly_reg_all_mape = mean_absolute_percentage_error(y_val, poly_reg_all_predictions)

# Print R-squared, MAPE, and MAE for each model with 3 decimal places and each value on a new line
print(f"Multivariate Linear Regression: \nR-squared: {linear_reg_r2:.2f}\nMAPE: {linear_reg_mape:.2f}\nMAE: {linear_reg_mae:.2f}")
print(f"\nPolynomial Regression (2nd degree) on BMI: \nR-squared: {poly_reg_bmi_r2:.2f}\nMAPE: {poly_reg_bmi_mape:.2f}\nMAE: {poly_reg_bmi_mae:.2f}")
print(f"\nMultivariate Polynomial Regression (2nd degree): \nR-squared: {poly_reg_all_r2:.2f}\nMAPE: {poly_reg_all_mape:.2f}\nMAE: {poly_reg_all_mae:.2f}")

print("\nNumber of parameters in Multivariate Linear Regression:", linear_reg.coef_.shape[0])
print("Number of parameters in Polynomial Regression (2nd degree) on BMI:", poly_reg_bmi.coef_.shape[0])
print("Number of parameters in Multivariate Polynomial Regression (2nd degree):", poly_reg_all.coef_.shape[0])



Multivariate Linear Regression: 
R-squared: 0.51
MAPE: 0.35
MAE: 38.22

Polynomial Regression (2nd degree) on BMI: 
R-squared: 0.30
MAPE: 0.42
MAE: 48.27

Multivariate Polynomial Regression (2nd degree): 
R-squared: 0.37
MAPE: 0.38
MAE: 42.47

Number of parameters in Multivariate Linear Regression: 10
Number of parameters in Polynomial Regression (2nd degree) on BMI: 2
Number of parameters in Multivariate Polynomial Regression (2nd degree): 65


The multivariate linear regression, explaining 51% of the variance with 10 parameters, outperforms the polynomial models on Scikit-Learn's Diabetes dataset. A second-degree polynomial regression focusing on BMI has 2 parameters and explains 30% of the variance. A multivariate second-degree polynomial regression, involving 65 parameters, explains 37% of the variance. Despite its simplicity, the multivariate linear regression strikes the best balance between accuracy and complexity, making it the optimal choice for deployment, ensuring reasonable predictive power without overfitting in practical applications.