In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import Ridge, Lasso, LinearRegression
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, make_scorer
import warnings
warnings.filterwarnings("ignore")

In [3]:
# Load the dataset:
data = pd.read_excel('OriginalData.xlsx')

# Selecting relevant features and target:
features = ['fat', 'fiber', 'sugars']
target = 'rating'

X = data[features]
y = data[target]

# Standardize the features:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Define evaluation metrics
def mape(y_true, y_pred):
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

# Setting up scoring methods:
scoring = {
    'RMSE': make_scorer(mean_squared_error, squared=False),
    'MAE': make_scorer(mean_absolute_error),
    'MAPE': make_scorer(mape)
}

# Initialize the OLS, Ridge, and Lasso models:
ols = LinearRegression()
ridge = Ridge()
lasso = Lasso()

# Set up grid search for Ridge and Lasso:
param_grid = {'alpha': np.logspace(-4, 4, 50)}
ridge_cv = GridSearchCV(ridge, param_grid, cv=10, scoring='neg_mean_squared_error')
lasso_cv = GridSearchCV(lasso, param_grid, cv=10, scoring='neg_mean_squared_error')

# Fit the models:
ols.fit(X_scaled, y)
ridge_cv.fit(X_scaled, y)
lasso_cv.fit(X_scaled, y)

# Predictions for evaluation:
y_pred_ols = ols.predict(X_scaled)
y_pred_ridge = ridge_cv.best_estimator_.predict(X_scaled)
y_pred_lasso = lasso_cv.best_estimator_.predict(X_scaled)

# Calculating metrics for each model:
def calculate_metrics(y_true, y_pred):
    rmse = mean_squared_error(y_true, y_pred, squared=False)
    mae = mean_absolute_error(y_true, y_pred)
    mape_value = mape(y_true, y_pred)
    return rmse, mae, mape_value

# Get metrics for each model:
results = {
    'Model': ['OLS', 'Ridge', 'Lasso'],
    'RMSE': [],
    'MAE': [],
    'MAPE': []
}

for model_name, y_pred in zip(['OLS', 'Ridge', 'Lasso'], [y_pred_ols, y_pred_ridge, y_pred_lasso]):
    rmse, mae, mape_value = calculate_metrics(y, y_pred)
    results['RMSE'].append(rmse)
    results['MAE'].append(mae)
    results['MAPE'].append(mape_value)

# Display the results:
results_df = pd.DataFrame(results)
display(results_df)

Unnamed: 0,Model,RMSE,MAE,MAPE
0,OLS,5.097585,4.183151,10.430002
1,Ridge,5.121186,4.201541,10.518289
2,Lasso,5.133491,4.193542,10.499429
