In [4]:
pip install pandas

Note: you may need to restart the kernel to use updated packages.


In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')
import statsmodels.api as sm
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import statsmodels.api as sm
from sklearn.metrics import mean_squared_error

In [2]:
df = pd.read_csv("/Users/safiaread/Downloads/calories.csv")
df['Gender'] = df['Gender'].map({'male': 0, 'female': 1})
df.drop(['User_ID'], axis=1, inplace=True)

In [3]:
X = df.drop(columns = ["Calories"])
y = df['Calories']

In [4]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)  
X_test_scaled = scaler.transform(X_test)  

X_train_scaled = pd.DataFrame(X_train_scaled, columns=X_train.columns)
X_test_scaled = pd.DataFrame(X_test_scaled, columns=X_test.columns)

In [9]:
df.describe()

Unnamed: 0,User_ID,Age,Height,Weight,Duration,Heart_Rate,Body_Temp,Calories
count,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0
mean,14977360.0,42.7898,174.465133,74.966867,15.5306,95.518533,40.025453,89.539533
std,2872851.0,16.980264,14.258114,15.035657,8.319203,9.583328,0.77923,62.456978
min,10001160.0,20.0,123.0,36.0,1.0,67.0,37.1,1.0
25%,12474190.0,28.0,164.0,63.0,8.0,88.0,39.6,35.0
50%,14997280.0,39.0,175.0,74.0,16.0,96.0,40.2,79.0
75%,17449280.0,56.0,185.0,87.0,23.0,103.0,40.6,138.0
max,19999650.0,79.0,222.0,132.0,30.0,128.0,41.5,314.0


In [12]:
import numpy as np
from sklearn.linear_model import Ridge,RidgeCV
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score, median_absolute_error, mean_squared_log_error



ridge_cv = RidgeCV(alphas=np.logspace(-10, 10, 19), cv=10) # Use 10-fold cross validation
ridge_cv.fit(X_train_scaled, y_train)

best_lambda = ridge_cv.alpha_

ridge_model = Ridge(alpha=best_lambda)
ridge_model.fit(X_train_scaled, y_train)

y_pred_ridge = ridge_model.predict(X_test_scaled)

# Calculate the test MSE
test_mse_ridge = mean_squared_error(y_test, y_pred_ridge)
mae = mean_absolute_error(y_test, y_pred_ridge)
r2 = r2_score(y_test, y_pred_ridge)
medae = median_absolute_error(y_test, y_pred_ridge)
print(f"Test MSE with Ridge Regression (lambda chosen by CV): {test_mse_ridge}")
print(f"MAE: {mae}")
print(f"Median Absolute Error: {medae}")
print(f"R²: {r2}")


Test MSE with Ridge Regression (lambda chosen by CV): 132.0019414005324
MAE: 8.441186197335307
Median Absolute Error: 6.444163873428884
R²: 0.9672921799498538


In [16]:
import numpy as np
from sklearn.linear_model import Lasso, LassoCV


lasso_cv = LassoCV(alphas=np.logspace(-10, 10, 19), cv=10) # Use 10-fold cross validation
lasso_cv.fit(X_train_scaled, y_train)

best_lambda = lasso_cv.alpha_

lasso_model = Lasso(alpha=best_lambda)
lasso_model.fit(X_train_scaled, y_train)

y_pred_lasso = lasso_model.predict(X_test_scaled)

# Calculate the test MSE
test_mse_lasso = mean_squared_error(y_test, y_pred_lasso)
print(f"Test MSE with Lasso Regression (lambda chosen by CV): {test_mse_lasso}")
mae = mean_absolute_error(y_test, y_pred_lasso)
r2 = r2_score(y_test, y_pred_lasso)
medae = median_absolute_error(y_test, y_pred_lasso)
print(f"Test MSE with Ridge Regression (lambda chosen by CV): {test_mse_lasso}")
print(f"MAE: {mae}")
print(f"Median Absolute Error: {medae}")
print(f"R²: {r2}")


Test MSE with Lasso Regression (lambda chosen by CV): 131.99574575130626
Test MSE with Ridge Regression (lambda chosen by CV): 131.99574575130626
MAE: 8.441513553833493
Median Absolute Error: 6.438014020527284
R²: 0.9672937151256082


In [19]:
import numpy as np
from sklearn.linear_model import ElasticNetCV
from sklearn.metrics import (
    mean_squared_error,
    mean_absolute_error,
    r2_score,
    median_absolute_error,
    mean_squared_log_error
)

# Fit ElasticNetCV with cross-validation and hyperparameter tuning
elastic_cv = ElasticNetCV(
    cv=10,
    l1_ratio=np.linspace(0.8, 1.0, 5),                   # Since it's leaning Lasso
    alphas=np.logspace(-6, -2, 50),                      # Zoom into small alphas
    random_state=42
)
elastic_cv.fit(X_train_scaled, y_train)

# Best parameters
print(f"Best alpha: {elastic_cv.alpha_:.4f}")
print(f"Best l1_ratio: {elastic_cv.l1_ratio_:.2f}")

# Predictions
y_train_pred = elastic_cv.predict(X_train_scaled)
y_test_pred = elastic_cv.predict(X_test_scaled)

# Error metrics function
def print_metrics(y_true, y_pred, label=""):
    print(f"\n{label} Metrics:")
    print(f"MSE: {mean_squared_error(y_true, y_pred):.4f}")
    print(f"RMSE: {mean_squared_error(y_true, y_pred, squared=False):.4f}")
    print(f"MAE: {mean_absolute_error(y_true, y_pred):.4f}")
    print(f"Median AE: {median_absolute_error(y_true, y_pred):.4f}")
    print(f"R²: {r2_score(y_true, y_pred):.4f}")
    
    # MSLE requires all values > 0
    if (y_true > 0).all() and (y_pred > 0).all():
        print(f"MSLE: {mean_squared_log_error(y_true, y_pred):.4f}")
    else:
        print("MSLE: Not computed (non-positive values present)")

# Print metrics
print_metrics(y_train, y_train_pred, "Training")
print_metrics(y_test, y_test_pred, "Testing")

Best alpha: 0.0000
Best l1_ratio: 1.00

Training Metrics:
MSE: 126.9458
RMSE: 11.2670
MAE: 8.3068
Median AE: 6.4403
R²: 0.9672
MSLE: Not computed (non-positive values present)

Testing Metrics:
MSE: 131.9958
RMSE: 11.4889
MAE: 8.4415
Median AE: 6.4380
R²: 0.9673
MSLE: Not computed (non-positive values present)
