In [1]:
import pandas as pd
import plotly.io as pio
import plotly.graph_objects as go
pio.templates.default = "plotly_white"
import plotly.express as px

data = pd.read_csv("Apple-Fitness-Data.csv")
print(data.head())

         Date       Time  Step Count  Distance  Energy Burned  \
0  2023-03-21  16:01:23           46   0.02543         14.620   
1  2023-03-21  16:18:37          645   0.40041         14.722   
2  2023-03-21  16:31:38           14   0.00996         14.603   
3  2023-03-21  16:45:37           13   0.00901         14.811   
4  2023-03-21  17:10:30           17   0.00904         15.153   

   Flights Climbed  Walking Double Support Percentage  Walking Speed  
0                3                              0.304          3.060  
1                3                              0.309          3.852  
2                4                              0.278          3.996  
3                3                              0.278          5.040  
4                3                              0.281          5.184  


In [2]:
print(data.isnull().sum())

Date                                 0
Time                                 0
Step Count                           0
Distance                             0
Energy Burned                        0
Flights Climbed                      0
Walking Double Support Percentage    0
Walking Speed                        0
dtype: int64


In [13]:
# Step Count Over Time
fig1 = px.line(data, x="Time",
               y="Step Count",
               title="Step Count Over Time")
fig1.show()

In [3]:
# Distance Covered Over Time
fig2 = px.line(data, x="Time",
               y="Distance",
               title="Distance Covered Over Time")
fig2.show()

In [4]:
# Energy Burned Over Time
fig3 = px.line(data, x="Time",
               y="Energy Burned",
               title="Energy Burned Over Time")
fig3.show()

In [5]:
# Calculate Average Step Count per Day
average_step_count_per_day = data.groupby("Date")["Step Count"].mean().reset_index()

fig5 = px.bar(average_step_count_per_day, x="Date",
              y="Step Count",
              title="Average Step Count per Day")
fig5.update_xaxes(type='category')
fig5.show()

In [6]:
# Calculate Walking Efficiency
data["Walking Efficiency"] = data["Distance"] / data["Step Count"]

fig6 = px.line(data, x="Time",
               y="Walking Efficiency",
               title="Walking Efficiency Over Time")
fig6.show()

In [7]:
import statsmodels.api as sm

# Create Time Intervals
time_intervals = pd.cut(pd.to_datetime(data["Time"], format='%H:%M:%S', errors='coerce').dt.hour,
                        bins=[0, 12, 18, 24],
                        labels=["Morning", "Afternoon", "Evening"], 
                        right=False)

data["Time Interval"] = time_intervals

# Variations in Step Count and Walking Speed by Time Interval
fig7 = px.scatter(data, x="Step Count",
                  y="Walking Speed",
                  color="Time Interval",
                  title="Step Count and Walking Speed Variations by Time Interval")

# Calculate OLS regression line
ols_results = sm.OLS(data["Walking Speed"], sm.add_constant(data["Step Count"])).fit()

# Add regression line to the plot
fig7.add_trace(px.line(x=data["Step Count"], y=ols_results.predict(sm.add_constant(data["Step Count"]))).data[0])

fig7.show()


In [9]:


# Convert relevant columns to numeric
numeric_columns = ["Step Count", "Distance", "Energy Burned", "Flights Climbed", "Walking Double Support Percentage", "Walking Speed"]
data[numeric_columns] = data[numeric_columns].apply(pd.to_numeric, errors='coerce')

# Create a pivot table
daily_avg_metrics_pivot = pd.pivot_table(data, index="Date", values=numeric_columns, aggfunc='mean')

# Reset index to make 'Date' a column
daily_avg_metrics_pivot = daily_avg_metrics_pivot.reset_index()

# Melt the DataFrame
daily_avg_metrics_melted = daily_avg_metrics_pivot.melt(id_vars=["Date"], 
                                                        value_vars=numeric_columns,
                                                        var_name="Metric",
                                                        value_name="Value")

# Treemap of Daily Averages for Different Metrics Over Several Weeks
fig = px.treemap(daily_avg_metrics_melted,
                 path=["Metric"],
                 values="Value",
                 color="Metric",
                 hover_data=["Value"],
                 title="Daily Averages for Different Metrics")
fig.show()


In [25]:
# Select metrics excluding Step Count
metrics_to_visualize = ["Distance", "Energy Burned", "Flights Climbed", 
                        "Walking Double Support Percentage", "Walking Speed"]

# Reshape data for treemap
daily_avg_metrics_melted = daily_avg_metrics_pivot.melt(id_vars=["Date"], value_vars=metrics_to_visualize)

fig = px.treemap(daily_avg_metrics_melted,
                 path=["variable"],
                 values="value",
                 color="variable",
                 hover_data=["value"],
                 title="Daily Averages for Different Metrics (Excluding Step Count)")
fig.show()