In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import xml.etree.ElementTree as ET
from datetime import datetime, timezone, timedelta

In [2]:
tree = ET.parse('apple_health_export 2/export.xml')
root = tree.getroot()

records = []
for record in root.findall('Record'):
    record_attrib = record.attrib
    records.append(record_attrib)

df = pd.DataFrame(records)
for k in ["creationDate", "startDate", "endDate"]:
    df[k] = pd.to_datetime(df[k], utc=True)

In [4]:
mask_a = df.type=="HKCategoryTypeIdentifierSleepAnalysis"
mask_b = df.startDate > datetime(2024, 10, 21, tzinfo=timezone.utc)
df_sleep = df[mask_a & mask_b].sort_values(by="startDate")

df_sleep.head()

Unnamed: 0,type,sourceName,sourceVersion,unit,creationDate,startDate,endDate,value,device
4578185,HKCategoryTypeIdentifierSleepAnalysis,Christian’s Apple Watch,11.0.1,,2024-10-21 06:00:40+00:00,2024-10-21 00:11:29+00:00,2024-10-21 00:16:59+00:00,HKCategoryValueSleepAnalysisAsleepDeep,
4578186,HKCategoryTypeIdentifierSleepAnalysis,Christian’s Apple Watch,11.0.1,,2024-10-21 06:00:40+00:00,2024-10-21 00:16:59+00:00,2024-10-21 00:51:29+00:00,HKCategoryValueSleepAnalysisAsleepCore,
4578187,HKCategoryTypeIdentifierSleepAnalysis,Christian’s Apple Watch,11.0.1,,2024-10-21 06:00:40+00:00,2024-10-21 00:51:29+00:00,2024-10-21 00:52:59+00:00,HKCategoryValueSleepAnalysisAwake,
4578188,HKCategoryTypeIdentifierSleepAnalysis,Christian’s Apple Watch,11.0.1,,2024-10-21 06:00:40+00:00,2024-10-21 00:52:59+00:00,2024-10-21 00:55:59+00:00,HKCategoryValueSleepAnalysisAsleepCore,
4578189,HKCategoryTypeIdentifierSleepAnalysis,Christian’s Apple Watch,11.0.1,,2024-10-21 06:00:40+00:00,2024-10-21 00:55:59+00:00,2024-10-21 01:03:59+00:00,HKCategoryValueSleepAnalysisAsleepREM,


In [4]:
mask_a = df.type=="HKCategoryTypeIdentifierSleepAnalysis"
mask_b = df.startDate > datetime(2024, 10, 21, tzinfo=timezone.utc)

mapping = {
    'HKCategoryValueSleepAnalysisAsleepCore': 'asleep',
    'HKCategoryValueSleepAnalysisAwake': 'awake',
    'HKCategoryValueSleepAnalysisAsleepREM': 'asleep',
    'HKCategoryValueSleepAnalysisAsleepDeep': 'asleep',
    'HKCategoryValueSleepAnalysisAsleepUnspecified': 'unspecified',
    'HKCategoryValueSleepAnalysisInBed': 'unspecified'
}

df_sleep = df[mask_a & mask_b].sort_values(by="startDate")
df_sleep["duration"] = df_sleep["endDate"] - df_sleep["startDate"]
df_sleep["tmp"] = df_sleep.apply(lambda row: ({"type": mapping[row.value], "duration": row["endDate"] - row["startDate"]}), axis=1)

df_sleep = df_sleep.groupby("creationDate").aggregate(
    ts_start = ("startDate", "min"),
    ts_end = ("endDate", "max"),
    details = ("tmp", list)
).reset_index(drop=True)

def _process_details(row):
    return pd.DataFrame(row).groupby("type").aggregate("sum").to_dict()["duration"]
df_tmp = pd.DataFrame(df_sleep.details.apply(_process_details).values.tolist())
df_sleep = pd.concat([df_sleep, df_tmp], axis=1)
df_sleep.drop(columns = ["details"], inplace=True)
# df_sleep["unspecified"] = df_sleep["unspecified"].fillna(timedelta(0))
df_sleep["total"] = df_sleep["ts_end"] - df_sleep["ts_start"]
df_sleep["utilization"] = df_sleep["asleep"] / df_sleep["total"]
df_sleep

Unnamed: 0,ts_start,ts_end,asleep,awake,total,utilization
0,2024-10-21 00:11:29+00:00,2024-10-21 06:00:29+00:00,0 days 05:29:00,0 days 00:20:00,0 days 05:49:00,0.942693
1,2024-10-21 21:38:03+00:00,2024-10-22 05:00:33+00:00,0 days 06:57:00,0 days 00:25:30,0 days 07:22:30,0.942373
2,2024-10-22 21:39:12+00:00,2024-10-23 05:05:12+00:00,0 days 07:00:00,0 days 00:26:00,0 days 07:26:00,0.941704
3,2024-10-23 21:32:13+00:00,2024-10-24 05:02:13+00:00,0 days 07:08:30,0 days 00:21:30,0 days 07:30:00,0.952222
4,2024-10-24 22:47:55+00:00,2024-10-25 05:38:25+00:00,0 days 06:13:00,0 days 00:37:30,0 days 06:50:30,0.908648
5,2024-10-25 21:15:11+00:00,2024-10-26 06:08:41+00:00,0 days 07:53:30,0 days 01:00:00,0 days 08:53:30,0.887535
6,2024-10-26 21:49:54+00:00,2024-10-27 06:09:54+00:00,0 days 07:56:00,0 days 00:24:00,0 days 08:20:00,0.952
7,2024-10-27 21:37:30+00:00,2024-10-28 05:30:30+00:00,0 days 07:29:30,0 days 00:23:30,0 days 07:53:00,0.950317
8,2024-10-28 22:04:54+00:00,2024-10-29 05:22:24+00:00,0 days 06:27:30,0 days 00:50:00,0 days 07:17:30,0.885714
9,2024-10-29 22:55:00+00:00,2024-10-30 06:00:00+00:00,0 days 06:28:30,0 days 00:36:30,0 days 07:05:00,0.914118


In [5]:
df_sleep.to_csv("sleep.csv", index=False)

In [None]:
import pandas as pd
import plotly.express as px

# Assuming df_sleep and df_eat have 'ts_start' and 'ts_end' columns

# Add an 'Activity' column to distinguish between sleep and eat periods
df_sleep['Activity'] = 'Sleep'
df_eat['Activity'] = 'Eat'

# Combine the two dataframes
df_plot = pd.concat([df_sleep, df_eat], ignore_index=True)

# Add a constant y-axis value to plot all bars on the same horizontal line
df_plot['All'] = 'All'
df_plot = df_plot[df_plot.ts_start > datetime(2024, 10, 12, tzinfo=timezone.utc)]

# Create the timeline plot
fig = px.timeline(
    df_plot,
    x_start='ts_start',
    x_end='ts_end',
    y='All',
    color='Activity',
    hover_data=df_plot.columns,  # Include all columns in hover data
    color_discrete_map={'Sleep': 'blue', 'Eat': 'green'}
)

# Hide the y-axis
fig.update_yaxes(visible=False)

# Adjust the layout for better visual appeal
fig.update_layout(height=200, showlegend=True)

# Display the plot
fig.show()

In [None]:
df_plot

In [None]:
plt.figure(figsize=(15,3))
plt.plot(df_sleep.ts_start, df_sleep.utilization, '--.')

In [None]:
df_sleep.utilization.describe()