In [None]:
import streamlit as st
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
import time

import warnings
warnings.filterwarnings('ignore')

st.set_page_config(page_title="Personal Fitness Tracker", page_icon="💪")

st.title("Personal Fitness Tracker")
st.write("Predict your calories burned based on your fitness parameters and analyze the data.")

st.sidebar.header("Enter Your Fitness Parameters")

def user_input_features():
    age = st.sidebar.slider("Age", 10, 100, 30)
    bmi = st.sidebar.slider("BMI", 15, 40, 20)
    duration = st.sidebar.slider("Duration (min)", 0, 35, 15)
    heart_rate = st.sidebar.slider("Heart Rate (bpm)", 60, 130, 80)
    body_temp = st.sidebar.slider("Body Temperature (°C)", 36.0, 42.0, 38.0)
    gender_button = st.sidebar.radio("Gender", ("Male", "Female"))
    gender = 1 if gender_button == "Male" else 0

    data_model = {
        "Age": age,
        "BMI": bmi,
        "Duration": duration,
        "Heart_Rate": heart_rate,
        "Body_Temp": body_temp,
        "Gender_male": gender
    }
    features = pd.DataFrame(data_model, index=[0])
    return features

df = user_input_features()

st.header("Your Input Parameters")
st.write(df)

# File uploaders
calories_file = st.file_uploader("Upload Calories CSV", type=["csv"])
exercise_file = st.file_uploader("Upload Exercise CSV", type=["csv"])

if calories_file and exercise_file:
    # Load and preprocess data
    @st.cache_data
    def load_and_preprocess_data(calories_file, exercise_file):
        calories = pd.read_csv(calories_file)
        exercise = pd.read_csv(exercise_file)
        exercise_df = exercise.merge(calories, on="User_ID").drop(columns="User_ID")

        # Calculate BMI for exercise_df
        exercise_df["BMI"] = exercise_df["Weight"] / ((exercise_df["Height"] / 100) ** 2)
        exercise_df["BMI"] = round(exercise_df["BMI"], 2)

        exercise_train_data, exercise_test_data = train_test_split(exercise_df, test_size=0.2, random_state=1)
        exercise_train_data = exercise_train_data[["Gender", "Age", "BMI", "Duration", "Heart_Rate", "Body_Temp", "Calories"]]
        exercise_test_data = exercise_test_data[["Gender", "Age", "BMI", "Duration", "Heart_Rate", "Body_Temp", "Calories"]]
        exercise_train_data = pd.get_dummies(exercise_train_data, drop_first=True)
        exercise_test_data = pd.get_dummies(exercise_test_data, drop_first=True)
        X_train = exercise_train_data.drop("Calories", axis=1)
        y_train = exercise_train_data["Calories"]
        X_test = exercise_test_data.drop("Calories", axis=1)
        y_test = exercise_test_data["Calories"]
        return X_train, y_train, X_test, y_test, exercise_df

    X_train, y_train, X_test, y_test, exercise_df = load_and_preprocess_data(calories_file, exercise_file)

    # Data Analysis
    st.header("Data Analysis")

    # Bar plot: Calories by Gender
    st.subheader("Calories Burned by Gender")
    gender_calories = exercise_df.groupby("Gender")["Calories"].mean()
    st.bar_chart(gender_calories)

    # Scatter plot: Calories vs. Duration
    st.subheader("Calories Burned vs. Duration")
    fig, ax = plt.subplots()
    sns.scatterplot(x="Duration", y="Calories", data=exercise_df, ax=ax)
    st.pyplot(fig)

    # Histogram: Distribution of Age
    st.subheader("Distribution of Age")
    fig, ax = plt.subplots()
    sns.histplot(exercise_df["Age"], bins=20, ax=ax)
    st.pyplot(fig)

    # Box plot: BMI distribution
    st.subheader("BMI Distribution")
    fig, ax = plt.subplots()
    sns.boxplot(x=exercise_df["BMI"], ax=ax)
    st.pyplot(fig)

    # Train the model
    @st.cache_resource
    def train_model(X_train, y_train):
        random_reg = RandomForestRegressor(n_estimators=1000, max_features=3, max_depth=6, random_state=42)
        random_reg.fit(X_train, y_train)
        return random_reg

    random_reg = train_model(X_train, y_train)

    # Align prediction data columns with training data
    df = df.reindex(columns=X_train.columns, fill_value=0)

    # Make prediction
    if st.button("Predict Calories Burned"):
        with st.spinner("Predicting..."):
            time.sleep(1)
            prediction = random_reg.predict(df)
            st.header("Predicted Calories Burned")
            st.write(f"{round(prediction[0], 2)} *kilocalories*")

            st.header("Similar Results")
            calorie_range = [prediction[0] - 10, prediction[0] + 10]
            similar_data = exercise_df[(exercise_df["Calories"] >= calorie_range[0]) & (exercise_df["Calories"] <= calorie_range[1])]
            if not similar_data.empty:
                st.write(similar_data.sample(min(5, len(similar_data))))
            else:
                st.write("No similar results found.")

            st.header("General Information")
            boolean_age = (exercise_df["Age"] < df["Age"].values[0]).tolist()
            boolean_duration = (exercise_df["Duration"] < df["Duration"].values[0]).tolist()
            boolean_body_temp = (exercise_df["Body_Temp"] < df["Body_Temp"].values[0]).tolist()
            boolean_heart_rate = (exercise_df["Heart_Rate"] < df["Heart_Rate"].values[0]).tolist()

            st.write(f"You are older than {round(sum(boolean_age) / len(boolean_age), 2) * 100}% of other people.")
            st.write(f"Your exercise duration is higher than {round(sum(boolean_duration) / len(boolean_duration), 2) * 100}% of other people.")
            st.write(f"You have a higher heart rate than {round(sum(boolean_heart_rate) / len(boolean_heart_rate), 2) * 100}% of other people during exercise.")
            st.write(f"You have a higher body temperature than {round(sum(boolean_body_temp) / len(boolean_body_temp), 2) * 100}% of other people during exercise.")

else:
    st.info("Please upload both Calories and Exercise CSV files to proceed.")