In [1]:
import pandas as pd
import plotly.io as pio
import plotly.graph_objects as go
import plotly.express as px

pio.templates.default = "plotly_white"

data = pd.read_csv("Apple-Fitness-Data.csv")
print(data.head())

         Date       Time  Step Count  Distance  Energy Burned  \
0  2023-03-21  16:01:23           46   0.02543         14.620   
1  2023-03-21  16:18:37          645   0.40041         14.722   
2  2023-03-21  16:31:38           14   0.00996         14.603   
3  2023-03-21  16:45:37           13   0.00901         14.811   
4  2023-03-21  17:10:30           17   0.00904         15.153   

   Flights Climbed  Walking Double Support Percentage  Walking Speed  
0                3                              0.304          3.060  
1                3                              0.309          3.852  
2                4                              0.278          3.996  
3                3                              0.278          5.040  
4                3                              0.281          5.184  


In [2]:
print(data.isnull().sum())

Date                                 0
Time                                 0
Step Count                           0
Distance                             0
Energy Burned                        0
Flights Climbed                      0
Walking Double Support Percentage    0
Walking Speed                        0
dtype: int64


In [3]:
# Step Count Over Time
fig1 = px.line(data, x="Time",
               y="Step Count",
               title="Step Count Over Time")
fig1.show()

In [4]:
# Distance Covered Over Time
fig2 = px.line(data, x="Time",
               y="Distance",
               title="Distance Covered Over Time")
fig2.show()

In [5]:
# Energy Burned Over Time
fig3 = px.line(data, x="Time",
               y="Energy Burned",
               title="Energy Burned Over Time")
fig3.show()

In [6]:
# Walking Speed Over Time
fig4 = px.line(data, x="Time",
               y="Walking Speed",
               title="Walking Speed Over Time")
fig4.show()

In [7]:
# Calculate Average Step Count per Day
average_step_count_per_day = data.groupby("Date")["Step Count"].mean().reset_index()

fig5 = px.bar(average_step_count_per_day, x="Date",
              y="Step Count",
              title="Average Step Count per Day")
fig5.update_xaxes(type='category')
fig5.show()

In [8]:
# Calculate Walking Efficiency
data["Walking Efficiency"] = data["Distance"] / data["Step Count"]

fig6 = px.line(data, x="Time",
               y="Walking Efficiency",
               title="Walking Efficiency Over Time")
fig6.show()

In [44]:

# Now, let’s have a look at the step count and walking speed variations by time intervals:
data = pd.DataFrame({
    "Time": ["08:30", "14:45", "19:15", "10:00", "21:30"],
    "Step Count": [5000, 7000, 6000, 8000, 4000],
    "Walking Speed": [3.5, 4.0, 3.8, 4.2, 3.0]
})
# Convert "Time" column to datetime and extract the hour
data["Time"] = pd.to_datetime(data["Time"], format="%H:%M").dt.hour

# Create time intervals
time_intervals = pd.cut(
    data["Time"],
    bins=[0, 12, 18, 24],  # Define bins for Morning, Afternoon, Evening
    labels=["Morning", "Afternoon", "Evening"], 
    right=False  # Exclude the right edge of the interval
)

data["Time Interval"] = time_intervals

# Visualize variations in Step Count and Walking Speed by Time Interval
fig7 = px.scatter(
    data,
    x="Step Count",
    y="Walking Speed",
    color="Time Interval",
    title="Step Count and Walking Speed Variations by Time Interval",
    trendline="ols"  # Requires statsmodels to be installed
)

#plot
fig7.show()

In [10]:
print(data.columns)

Index(['Time', 'Step Count', 'Walking Speed', 'Time Interval'], dtype='object')


In [11]:
if 'Date' in data.columns:
    daily_avg_metrics = data.groupby("Date").mean().reset_index()
else:
    print("Column 'Date' not found in the DataFrame.")

Column 'Date' not found in the DataFrame.


In [22]:
print(data.head())

   Time  Step Count  Walking Speed Time Interval       Date
0     8        5000            3.5       Morning 2023-01-01
1    14        7000            4.0     Afternoon 2023-01-02
2    19        6000            3.8       Evening 2023-01-03
3    10        8000            4.2       Morning 2023-01-04
4    21        4000            3.0       Evening 2023-01-05


In [24]:
print(data.dtypes)

Time              object
Step Count         int64
Walking Speed    float64
Time Interval     object
Date              object
dtype: object


In [40]:
# Combine 'Date' and 'Time' into a single 'DateTime' column
data['DateTime'] = data['Date'].astype(str) + ' ' + data['Time'].astype(str)
data['DateTime'] = pd.to_datetime(data['DateTime'], errors='coerce')


Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.



In [30]:
# Group by 'Date' and calculate the mean of numeric columns only
daily_avg_metrics = data.groupby("Date").mean(numeric_only=True).reset_index()

In [34]:
daily_avg_metrics_melted = daily_avg_metrics.melt(
    id_vars=["Date"],
    value_vars=[
        "Step Count",
        "Walking Speed",
    ]
)

# Treemap of Daily Averages for Different Metrics Over Several Weeks
fig = px.treemap(
    daily_avg_metrics_melted,
    path=["variable"],
    values="value",
    color="variable",
    hover_data=["value"],
    title="Daily Averages for Different Metrics"
)

fig.show()

In [35]:
data1 = {
    'Date': ['2023-03-21', '2023-03-22', '2023-03-23'],
    'Step Count': [46, 645, 14],
    'Distance': [0.02543, 0.40041, 0.00996],
    'Energy Burned': [14.62, 14.722, 14.603],
    'Flights Climbed': [3, 3, 4],
    'Walking Double Support Percentage': [0.304, 0.309, 0.278],
    'Walking Speed': [3.06, 3.852, 3.996]
}

daily_avg_metrics = pd.DataFrame(data1)

# Convert 'Date' column to datetime
daily_avg_metrics['Date'] = pd.to_datetime(daily_avg_metrics['Date'])



In [36]:
daily_avg_metrics_melted = daily_avg_metrics.melt(
    id_vars=["Date"],
    value_vars=[
        "Step Count",
        "Distance",
        "Energy Burned",
        "Flights Climbed",
        "Walking Double Support Percentage",
        "Walking Speed"
    ]
)

In [None]:
fig = px.treemap(daily_avg_metrics_melted,
                 path=["variable"],
                 values="value",
                 color="variable",
                 hover_data=["value"],
                 title="Daily Averages for Different Metrics")
fig.show()

#  The size of each tile corresponds to the value of the metric
# data displays the exact average value for each metric

In [None]:
#The Step Count metric dominates the visualization due to its generally higher numerical values compared to other metrics,
#  making it difficult to visualize variations in the other metrics effectively.
#  As the value of step count is higher than the value of all other metrics

# Select metrics excluding Step Count
metrics_to_visualize = ["Distance", "Energy Burned", "Flights Climbed", 
                        "Walking Double Support Percentage", "Walking Speed"]

# Reshape data for treemap
daily_avg_metrics_melted = daily_avg_metrics.melt(id_vars=["Date"], value_vars=metrics_to_visualize)

fig = px.treemap(daily_avg_metrics_melted,
                 path=["variable"],
                 values="value",
                 color="variable",
                 hover_data=["value"],
                 title="Daily Averages for Different Metrics (Excluding Step Count)")
fig.show()