Loading Libraries and Data

In [None]:
import pandas as pd
import numpy as np
import re
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn import model_selection
import seaborn as sns
from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split
import pickle

In [None]:
data=pd.read_csv(r"calories.csv")

Data.EXP

In [None]:
data.head()

In [None]:
data.describe()

In [None]:
data.info()

Visualiz missing values

In [None]:
sns.heatmap(data.isnull())

Data Visualization 

In [None]:
sns.countplot(x='Gender', data=data)
plt.title("Count by Category"); plt.xticks(rotation=45); plt.show()

In [None]:
sns.histplot(data["Calories"], kde=True)

In [None]:
plt.hist(data['Age'], bins=30)
plt.xlabel("Value"); plt.ylabel("Frequency"); plt.title("Histogram"); plt.show()

In [None]:
sns.boxplot(x="Gender", y="Calories", data=data)
plt.title("Boxplot by Category"); plt.show()

In [None]:
plt.scatter(data['Weight'], data['Height'], alpha=0.6)
plt.xlabel("W"); plt.ylabel("H"); plt.title("Scatter Plot"); plt.show()

In [None]:
sns.pairplot(data[['Age','Heart_Rate','Body_Temp','Calories']], hue='Calories')
plt.show()

In [None]:
sns.barplot(x='Gender', y='Calories', data=data, estimator=np.mean)
plt.title("Average Value by Category"); plt.xticks(rotation=45); plt.show()

In [None]:
data["Gender"]=data["Gender"].replace(
    {
     "female":0,
     "male":1   
        
    }
)

In [None]:
plt.figure(figsize=(8,6))
sns.heatmap(data.corr(), annot=True, cmap="coolwarm", fmt=".2f")
plt.title("Correlation Heatmap")
plt.show()

Data PreProcessing

In [None]:
y=data["Calories"]
x=data.drop(["Calories"] , axis=1)
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
x = scaler.fit_transform(x)
x_train , x_test , y_train , y_test =model_selection.train_test_split(x ,y , test_size=0.2,random_state=42 )


In [None]:

from sklearn.metrics import classification_report, roc_auc_score, confusion_matrix, ConfusionMatrixDisplay

from sklearn.model_selection import cross_val_score

models = {
    "Linear Regression": LinearRegression(),
    "Decision Tree": DecisionTreeRegressor(),
    "Random Forest": RandomForestRegressor(
        
        n_estimators=500,       
    max_depth=6,             
    min_samples_split=50,   
    min_samples_leaf=20,    
    max_features=0.3,        
    random_state=42),
    "Gradient Boosting": GradientBoostingRegressor()
}

for name, model in models.items():
    scores = cross_val_score(model, x, y, cv=5, scoring="r2")
    print(f"{name} CV R² scores: {scores}")
    print(f"{name} Mean R²: {scores.mean():.3f}")
    print("-"*50)



In [None]:
for name, model in models.items():
    model.fit(x_train , y_train)
    yr_pred = model.predict(x_test)
    plt.scatter(y_test, yr_pred, s=10)
    plt.plot([y_test.min(), y_test.max()], [y_train.min(), y_train.max()], 'k--')
    plt.xlabel('Actual'); plt.ylabel('Predicted'); plt.title(f'Predicted vs Actual for {name}'); plt.show()

In [None]:
    
for name, model in models.items():
    model.fit(x_train , y_train)
    yr_pred = model.predict(x_test)
    resid = y_test - yr_pred
    plt.scatter(yr_pred, resid, s=10)
    plt.axhline(0, linestyle='--')
    plt.xlabel('Predicted'); plt.ylabel('Residual'); plt.title(f'Residuals{name}'); plt.show()
    

In [None]:
from sklearn.model_selection import train_test_split, learning_curve, validation_curve
    
for name, model in models.items():
    train_sizes, train_scores, val_scores = learning_curve(
        model, x, y, cv=5, scoring='neg_root_mean_squared_error',
        train_sizes=np.linspace(0.1, 1.0, 5), n_jobs=-1)
    plt.plot(train_sizes, -train_scores.mean(axis=1), marker='o', label='Train RMSE')
    plt.plot(train_sizes, -val_scores.mean(axis=1), marker='o', label='Val RMSE')
    plt.xlabel('Training examples'); plt.ylabel('RMSE'); plt.title(f'Learning Curve ({name})'); plt.legend(); plt.show()

In [None]:
import joblib

joblib.dump(model, "churn_model.pkl")
joblib.dump(scaler, "scaler.pkl")


model = joblib.load("churn_model.pkl")
scaler = joblib.load("scaler.pkl")