In [None]:
import pandas as pd
import numpy as np
import warnings
import pickle
import seaborn as sns
import matplotlib.pyplot as plt
from tkinter import *
from sklearn.preprocessing import StandardScaler, OrdinalEncoder
from sklearn.metrics import r2_score, mean_absolute_error
from sklearn.model_selection import cross_val_score, KFold, train_test_split
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor

# Suppress warnings
warnings.filterwarnings('ignore')

# 1. Reading data from CSV
def read_csv(file_path):
    """
    Read data from a CSV file and return a pandas DataFrame.

    Parameters:
    - file_path: str, the path to the CSV file.

    Returns:
    - pd.DataFrame, the loaded DataFrame.
    """
    return pd.read_csv(file_path)

# 2. Getting information and statistics about the dataset
def dataset_info_statistics(data):
    """
    Display information and basic statistics about the dataset.

    Parameters:
    - data: pandas DataFrame, input data.

    Returns:
    - None
    """
    print("Dataset Information:")
    print(data.info())
    print("\nBasic Statistics for Numerical Columns:")
    print(data.describe())
    print("\n")

# 3. Check for null values in the dataset
def check_null(data):
    """
    Check for null values in the dataset.

    Parameters:
    - data: pandas DataFrame, input data.

    Returns:
    - pd.Series, the count of null values for each column.
    """
    null_counts = data.isnull().sum()
    print("Null Values in the Dataset:")
    return null_counts

# 4. Check for duplicated rows in the dataset
def check_duplicates(data):
    """
    Check for duplicated rows in the dataset.

    Parameters:
    - data: pandas DataFrame, input data.

    Returns:
    - bool, True if any duplicated rows exist, False otherwise.
    """
    return data.duplicated().any()

# 5. Getting basic analysis for numerical and categorical columns
def plot_graph(data):
    """
    Plot graphs for numerical and categorical data in a DataFrame.
    
    Parameters:
    - data: Pandas DataFrame, input data.
    
    Returns:
    - None
    """
    numerical_columns = data.select_dtypes(include=np.number).columns
    for column in numerical_columns:
        plt.figure(figsize=(5, 3))
        sns.histplot(data[column], kde=True)
        plt.title(f"Histogram for {column}")
        plt.xlabel(column)
        plt.ylabel("Frequency")
        plt.show()
        
    categorical_columns = data.select_dtypes(include='object').columns
    for column in categorical_columns:
        plt.figure(figsize=(5, 3))
        sns.countplot(data[column])
        plt.title(f'Countplot for {column}')
        plt.xlabel(column)
        plt.ylabel('Count')
        plt.xticks(rotation=45)
        plt.show()

# 6. Separate features and target
def separate_features_target(data, target_column):
    """
    Separate features and target variable.
    
    Parameters: 
    - data: pandas DataFrame, input data.
    - target_column: str, the column representing the target variable.
    
    Returns:
    - X: pandas DataFrame, features.
    - y: pandas Series, target variable.
    """
    X = data.drop(columns=[target_column], axis=1)
    y = data[target_column]
    
    return X, y

# 7. Train-test split
def perform_train_test_split(X, y, test_size=0.20, random_state=42):
    """
    Perform train-test split on the dataset.

    Parameters:
    - X: pandas DataFrame, features.
    - y: pandas Series, target variable.
    - test_size: float, optional, the proportion of the dataset to include in the test split (default is 0.2).
    - random_state: int or None, optional, seed for random number generation (default is None).

    Returns:
    - X_train: pandas DataFrame, features for training.
    - X_test: pandas DataFrame, features for testing.
    - y_train: pandas Series, target variable for training.
    - y_test: pandas Series, target variable for testing.
    """
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=random_state)
    return X_train, X_test, y_train, y_test

# UserGoal class
class UserGoal:
    def __init__(self, user_id, target_calories):
        self.user_id = user_id
        self.target_calories = target_calories
        self.current_calories = 0

    def update_calories_burned(self, burned_calories):
        self.current_calories += burned_calories
        print(f"User {self.user_id} burned {burned_calories} calories. Total: {self.current_calories}/{self.target_calories}")

# New class: UserGoalWithAchievements
class UserGoalWithAchievements(UserGoal):
    def __init__(self, user_id, target_calories):
        super().__init__(user_id, target_calories)
        self.achievements = []

    def update_calories_burned(self, burned_calories):
        super().update_calories_burned(burned_calories)
        self.check_achievements()

    def check_achievements(self):
        if self.current_calories >= self.target_calories:
            achievement_message = f"User {self.user_id} has reached their calorie target of {self.target_calories} calories!"
            self.achievements.append(achievement_message)
            print(achievement_message)

# Main Execution
calories = read_csv('calories.csv')
exercise = read_csv('exercise.csv')

data = pd.merge(calories, exercise, on='User_ID')

dataset_info_statistics(data)
check_null(data)

# Uncomment to visualize data
# plot_graph(data)

X, y = separate_features_target(data, 'Calories')
X = X.drop(columns=['User_ID'])
X_train, X_test, y_train, y_test = perform_train_test_split(X, y, test_size=0.20, random_state=42)

preprocessor = ColumnTransformer(transformers=[
    ('ordinal', OrdinalEncoder(), ['Gender']),
    ('num', StandardScaler(), ['Age', 'Height', 'Weight', 'Duration', 'Heart_Rate', 'Body_Temp']),
], remainder='passthrough')

# Define pipeline with Linear Regression
pipeline = Pipeline([("preprocessor", preprocessor),
                     ("model", LinearRegression())])

# Fit the model
pipeline.fit(X_train, y_train)
y_pred = pipeline.predict(X_test)

# Evaluate model
print("R2 Score:", r2_score(y_test, y_pred))
kfold = KFold(n_splits=5, shuffle=True, random_state=42)
cv_results = cross_val_score(pipeline, X, y, cv=kfold, scoring='r2')
print("Cross-Validation Mean R2 Score:", cv_results.mean())
print("Mean Absolute Error:", mean_absolute_error(y_test, y_pred))

# Model scorer function
def model_scorer(model_name, model):
    output = []
    output.append(model_name)

    pipeline = Pipeline([
        ('preprocessor', preprocessor),
        ('model', model)])
    
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=42)
    pipeline.fit(X_train, y_train)
    y_pred = pipeline.predict(X_test)
    
    output.append(r2_score(y_test, y_pred))
    output.append(mean_absolute_error(y_test, y_pred))
    
    kfold = KFold(n_splits=5, shuffle=True, random_state=42)
    cv_results = cross_val_score(pipeline, X, y, cv=kfold, scoring='r2')
    output.append(cv_results.mean())
    
    return output

model_dict = {
    'Linear Regression': LinearRegression(),
    'Random Forest': RandomForestRegressor(),
    'XGBoost': XGBRegressor(),
}

model_output = []
for model_name, model in model_dict.items():
    model_output.append(model_scorer(model_name, model))
print(model_output)

# Save the model
with open('pipeline.pkl', 'wb') as f:
    pickle.dump(pipeline, f)

# Load the model
with open('pipeline.pkl', 'rb') as f:
    pipeline_saved = pickle.load(f)

# Sample input for prediction
sample = pd.DataFrame({
    'Gender': 'male',
    'Age': 68,
    'Height': 190.0,
    'Weight': 94.0,
    'Duration': 29.0,
    'Heart_Rate': 105.0,
    'Body_Temp': 40.8,
}, index=[0])

result = pipeline_saved.predict(sample)
print("Predicted Calories Burnt:", result)

# GUI for predictions
def show_entry():
    with open('pipeline.pkl', 'rb') as f:
        pipeline = pickle.load(f)

    p1 = str(clicked.get())
    p2 = float(e2.get())
    p3 = float(e3.get())
    p4 = float(e4.get())
    p5 = float(e5.get())
    p6 = float(e6.get())
    p7 = float(e7.get())

    sample = pd.DataFrame({
        'Gender': [p1],
        'Age': [p2],
        'Height': [p3],
        'Weight': [p4],
        'Duration': [p5],
        'Heart_Rate': [p6],
        'Body_Temp': [p7],
    })

    result = pipeline.predict(sample)
    label_result.config(text=f"Predicted Calories Burnt: {result[0]}")

# Create GUI
# UserGoal class
class UserGoal:
    def __init__(self, user_id, target_calories):
        self.user_id = user_id
        self.target_calories = target_calories
        self.current_calories = 0

    def update_calories_burned(self, burned_calories):
        self.current_calories += burned_calories
        return self.current_calories

# New class: UserGoalWithAchievements
class UserGoalWithAchievements(UserGoal):
    def __init__(self, user_id, target_calories):
        super().__init__(user_id, target_calories)
        self.achievements = []

    def update_calories_burned(self, burned_calories):
        current_calories = super().update_calories_burned(burned_calories)
        return current_calories

    def check_achievements(self):
        if self.current_calories >= self.target_calories:
            achievement_message = f"User {self.user_id} has reached their calorie target of {self.target_calories} calories!"
            self.achievements.append(achievement_message)
            return achievement_message
        return None

# Calorie Prediction App
class CalorieApp:
    def __init__(self, root):
        self.root = root
        self.root.geometry("600x600")
        self.root.title("Calorie Prediction and Goal Tracker")
        self.root.config(bg="#f7f9fc")
        
        self.user_goal = None
        
        # Title
        title_label = Label(self.root, text="Calorie Prediction App", font=("Helvetica", 20, "bold"), bg="#f7f9fc", fg="#333")
        title_label.pack(pady=10)

        # Dropdown for gender selection
        self.clicked = StringVar()
        self.clicked.set("male")
        drop = OptionMenu(self.root, self.clicked, "male", "female")
        drop.pack(pady=10)

        # Input fields for user data
        self.e2 = self.create_entry("Enter Age (years)")
        self.e3 = self.create_entry("Enter Height (cm)")
        self.e4 = self.create_entry("Enter Weight (kg)")
        self.e5 = self.create_entry("Enter Duration (minutes)")
        self.e6 = self.create_entry("Enter Heart Rate")
        self.e7 = self.create_entry("Enter Body Temperature (°C)")

        # Input for target calories
        self.e_target = self.create_entry("Set Target Calories")

        # Button for prediction
        btn_predict = Button(self.root, text="Predict", command=self.predict_calories, bg="#4CAF50", fg="white", font=("Helvetica", 12, "bold"))
        btn_predict.pack(pady=10)

        # Button for setting user goal
        btn_set_goal = Button(self.root, text="Set Goal", command=self.set_user_goal, bg="#2196F3", fg="white", font=("Helvetica", 12, "bold"))
        btn_set_goal.pack(pady=10)

        # Input for burned calories
        self.e_burned = self.create_entry("Enter Burned Calories")

        # Button for updating burned calories
        btn_update_burned = Button(self.root, text="Update Burned Calories", command=self.update_burned_calories, bg="#FF9800", fg="white", font=("Helvetica", 12, "bold"))
        btn_update_burned.pack(pady=10)

        # Result label
        self.label_result = Label(self.root, text="", bg="#f7f9fc", fg="#333", font=("Helvetica", 12))
        self.label_result.pack(pady=10)

        # Achievement label
        self.label_achievement = Label(self.root, text="", bg="#f7f9fc", fg="#333", font=("Helvetica", 12))
        self.label_achievement.pack(pady=10)

    def create_entry(self, placeholder):
        entry = Entry(self.root, width=30, font=("Helvetica", 12))
        entry.pack(pady=5)
        entry.insert(0, placeholder)
        return entry

    def predict_calories(self):
        # Load the pipeline
        with open('pipeline.pkl', 'rb') as f:
            pipeline = pickle.load(f)

        sample = pd.DataFrame({
            'Gender': [self.clicked.get()],
            'Age': [float(self.e2.get())],
            'Height': [float(self.e3.get())],
            'Weight': [float(self.e4.get())],
            'Duration': [float(self.e5.get())],
            'Heart_Rate': [float(self.e6.get())],
            'Body_Temp': [float(self.e7.get())],
        })

        result = pipeline.predict(sample)
        self.label_result.config(text=f"Predicted Calories Burnt: {result[0]:.2f}")

    def set_user_goal(self):
        target_calories = float(self.e_target.get())
        user_id = "User1"  # You can modify this to take user input if needed
        self.user_goal = UserGoalWithAchievements(user_id, target_calories)
        self.label_achievement.config(text=f"Target Calories Set: {target_calories}")

    def update_burned_calories(self):
        if self.user_goal is None:
            self.label_achievement.config(text="Please set a target first!")
            return
        
        burned_calories = float(self.e_burned.get())
        current_calories = self.user_goal.update_calories_burned(burned_calories)
        achievement_message = self.user_goal.check_achievements()

        if achievement_message:
            self.label_achievement.config(text=achievement_message)
        else:
            self.label_achievement.config(text=f"Current Calories: {current_calories}/{self.user_goal.target_calories}")

# Main Execution
if __name__ == "__main__":
    root = Tk()
    app = CalorieApp(root)
    root.mainloop()