In [2]:
import pandas as pd
import plotly.io as pio
import plotly.graph_objects as go
pio.templates.default = "plotly_dark"
import plotly.express as plx

In [None]:
userdata = pd.read_csv("Apple-Fitness-Data.csv")
print(userdata.head())

In [None]:
print(userdata.isnull().sum())

In [None]:
stepTime = plx.line(userdata, x="Time",
               y="Step Count",
               title="Step Count Over Time")
stepTime.show()

In [None]:
distance_time_graph = plx.line(userdata, x="Time",
               y="Distance",
               title="Distance Covered Over Time")
distance_time_graph.show()

In [None]:
energyTime = plx.line(userdata, x="Time",
               y="Energy Burned",
               title="Energy Burned Over Time")
energyTime.show()

In [None]:
speedTime = plx.line(userdata, x="Time",
               y="Walking Speed",
               title="Walking Speed Over Time")
speedTime.show()

In [29]:
avgStep_per_day = userdata.groupby("Date")["Step Count"].mean().reset_index()

avgStep_count = plx.bar(avgStep_per_day, x="Date",
              y="Step Count",
              title="Average Step Count per Day")
avgStep_count.update_xaxes(type='category')
avgStep_count.show()

Most average step counted on 22nd March and 30th March to 1st April.

In [28]:
userdata["Walking Efficiency"] = userdata["Distance"] / userdata["Step Count"]

walkingEfficiency = plx.line(userdata, x="Time",
               y="Walking Efficiency",
               title="Walking Efficiency Over Time")
walkingEfficiency.show()

Most walking efficiency was recorded on around 19:07:33 which was almost 0.003 and the lowest one recoreded afterwards, between 19:07:33 to 23:29:50.

In [27]:
time_intervals = pd.cut(pd.to_datetime(userdata["Time"]).dt.hour,
                        bins=[0, 12, 18, 24],
                        labels=["Morning", "Afternoon", "Evening"],
                        right=False)

userdata["Time Interval"] = time_intervals

stepTimeSpeed = plx.scatter(userdata, x="Step Count",
                  y="Walking Speed",
                  color="Time Interval",
                  title="Step Count and Walking Speed Variations by Time Interval",
                  trendline='ols')
stepTimeSpeed.show()

Most number of steps were counted during afternoon. Second highest was at evening. The most speed was recorded during evening and least at afternoon.

In [25]:
daily_avg_metrics = userdata.groupby("Date").mean().reset_index()

daily_avg_metrics_melted = daily_avg_metrics.melt(id_vars=["Date"],
                                                  value_vars=["Step Count", "Distance",
                                                              "Energy Burned", "Flights Climbed",
                                                              "Walking Double Support Percentage",
                                                              "Walking Speed"])

dailyMatrics = plx.treemap(daily_avg_metrics_melted,
                 path=["variable"],
                 values="value",
                 color="variable",
                 hover_data=["value"],
                 title="Daily Averages for Different Metrics")
dailyMatrics.show()


The default value of numeric_only in DataFrameGroupBy.mean is deprecated. In a future version, numeric_only will default to False. Either specify numeric_only or select only columns which should be valid for the function.



In [26]:
metricsVisualization = ["Distance", "Energy Burned", "Flights Climbed",
                        "Walking Double Support Percentage", "Walking Speed"]

daily_avg_metrics_melted = daily_avg_metrics.melt(id_vars=["Date"], value_vars= metricsVisualization)

matricsExStep = plx.treemap(daily_avg_metrics_melted,
                 path=["variable"],
                 values="value",
                 color="variable",
                 hover_data=["value"],
                 title="Daily Averages for Different Metrics (Excluding Step Count)")
matricsExStep.show()

So we may conclude that the user walks the most during afternoon and evening. His most speed and efficiency was recorded during the evening.
The analysis also shows that the user walks below 150 steps on most of the days but he made it more at the last few days. The analysis suffest that the user need to maintain the consistency.