In [1]:
import pandas as pd
import xml.etree.ElementTree as et
import polars as pl
import plotly.express as px
import plotly.graph_objects as go
from astral import moon
import datetime as dt
import os
os.chdir(r"C:\Users\Christian\code_projects\apple-heart-rate")

### Preprocessing, memory optimization

In [3]:
tree = et.parse(r"C:\Users\Christian\code_projects\health\apple_health_export\export.xml")
root = tree.getroot()
records = []

for record in root.findall('Record'):
    record_data = {
        'type': record.attrib.get('type'),
        'unit': record.attrib.get('unit'),
        'value': record.attrib.get('value'),
        'sourceName': record.attrib.get('sourceName'),
        'sourceVersion': record.attrib.get('sourceVersion'),
        'device': record.attrib.get('device'),
        'creationDate': record.attrib.get('creationDate'),
        'startDate': record.attrib.get('startDate'),
        'endDate': record.attrib.get('endDate')
    }
    records.append(record_data)

apple_data = pd.DataFrame(records)
print(apple_data.info())
apple_data.type = apple_data.type.astype('category')
apple_data.unit = apple_data.unit.astype('category')
apple_data.value = pd.to_numeric(apple_data.value, errors='coerce')
apple_data.sourceName = apple_data.sourceName.astype('category')
apple_data.device = apple_data.device.astype('category')
apple_data.creationDate = pd.to_datetime(apple_data.creationDate).dt.tz_localize(None)
apple_data.startDate = pd.to_datetime(apple_data.startDate).dt.tz_localize(None)
apple_data.endDate = pd.to_datetime(apple_data.endDate).dt.tz_localize(None)
print(apple_data.info())
apple_data.to_feather("data/apple_health.feather")

In [18]:
apple_data = pd.read_feather("data/apple_health.feather")


In [4]:
all_metrics = apple_data[apple_data.type.str.contains('heart|step|stand',case=False)].copy().sort_values(by='startDate').reset_index(drop=True)
del apple_data

In [5]:
all_metrics.type.unique().tolist()

['HKQuantityTypeIdentifierStepCount',
 'HKCategoryTypeIdentifierAppleStandHour',
 'HKQuantityTypeIdentifierWalkingHeartRateAverage',
 'HKQuantityTypeIdentifierHeartRate',
 'HKQuantityTypeIdentifierRestingHeartRate',
 'HKQuantityTypeIdentifierHeartRateVariabilitySDNN',
 'HKCategoryTypeIdentifierHighHeartRateEvent',
 'HKQuantityTypeIdentifierAppleStandTime',
 'HKQuantityTypeIdentifierWalkingStepLength']

## Now that's a lot of heartbeats!

In [6]:
heart_rate = all_metrics.query('type == "HKQuantityTypeIdentifierHeartRate"').copy()
heart_rate_fig = px.line(heart_rate[heart_rate.startDate >= pd.to_datetime('2023-01-01')], x = 'startDate',y = 'value')
heart_rate_fig

### I just realized, I take my watch off when I sleep. By looking "between" the heart rate data, I have a pretty decent estimate of my sleep pattern for the past 7 years!

In [7]:
#extracting sleep times like this give you the date of morning after sleep
heart_rate['measurement_gap'] = heart_rate.startDate.diff()
heart_rate['inferred_sleep'] = heart_rate.measurement_gap > pd.Timedelta(hours=4)
inferred_sleep = heart_rate[heart_rate.inferred_sleep == True][['startDate','measurement_gap']].copy().reset_index(drop=True)
inferred_sleep = inferred_sleep.assign(
    hours_of_sleep = inferred_sleep.measurement_gap.dt.total_seconds() / 3600,
    #this provides the date of previous day, when sleep started
    sleep_date = inferred_sleep.startDate - pd.Timedelta(days=1)
    )
inferred_sleep = inferred_sleep.rename(columns = {'startDate':'woke_up_date'})

inferred_sleep['day_of_week'] = inferred_sleep.sleep_date.dt.day_name()

#remove some outliers, i don't think i slept more than 15 hours
inferred_sleep = inferred_sleep[inferred_sleep.hours_of_sleep < 15].reset_index(drop=True)
inferred_sleep['std_dev'] = inferred_sleep.hours_of_sleep.rolling(window =60).std()

# color scale for the days of the week if I need them
color_dict = {'Monday':'blue', 'Tuesday':'orange', 'Wednesday':'green', 'Thursday':'red', 'Friday':'purple', 'Saturday':'black', 'Sunday':'gray'}
inferred_sleep['day_color'] = inferred_sleep.day_of_week.map(color_dict)

### Woah! So, obviously there is some noise here due to me forgetting to put my watch on/forgetting to take it off (no comment), but there is an interesting trend towards consistency in recent years as I have become more in tune with my sleep (aka less cool)

In [8]:
sleep_chart = go.Figure()

for day in inferred_sleep.day_of_week.unique():
    sleep_chart.add_trace(go.Scatter(x=inferred_sleep.sleep_date[inferred_sleep.day_of_week == day], y=inferred_sleep.hours_of_sleep[inferred_sleep.day_of_week == day], mode='markers', name=day))
sleep_chart.add_trace(go.Scatter(x=inferred_sleep.sleep_date, y=inferred_sleep.std_dev, mode='lines', line=dict(width=3), name='Rolling Monthly Std. Deviation'))
sleep_chart.update_layout(
    xaxis = dict(title = "Date", range = [pd.to_datetime('2017-12-24'), pd.to_datetime('2024-10-01')]),
    yaxis = dict(title = "Hours of sleep", range = [0, 18], dtick = 2),
    title = "Inferred Sleep From Heart Rate Gaps",
)

In [10]:
inferred_sleep.to_feather('data/inferred_sleep.feather')

## Moon Phase and Heart Rate
### Bear with me here. This paper discusses the influence moon phase has on heart rate: https://doi.org/10.1007/s00484-012-0605-z. What am I supposed to not see for myself??
### Hoping to see a slight decrease in my resting heart rate during full and new moon phases


In [11]:
def get_moon_phase(date):
    phase_dict = {
    range(0, 7): "new_moon",
    range(7, 14): "first_quarter",
    range(14, 21): "full_moon",
    range(21, 28): "last_quarter"
    }
    for num_range,phase_name in phase_dict.items():
        if int(moon.phase(date)) in num_range:
            return phase_name



heart_rate['moon_phase'] = heart_rate.startDate.apply(get_moon_phase)


In [12]:
heart_rate['year'] = heart_rate.startDate.dt.year
avg_rates_phase = heart_rate[heart_rate.value < 80].groupby(['moon_phase','year']).value.mean().reset_index()

In [13]:
resting_heart = all_metrics[all_metrics.type.str.contains('resting',case=False)].copy().reset_index(drop=True)
resting_heart['moon_phase'] = resting_heart.startDate.apply(get_moon_phase)
resting_heart['year'] = resting_heart.startDate.dt.year

avg_resting_per_phase = resting_heart.groupby(['moon_phase','year']).value.mean().reset_index()


### Hmm, underwhelming results. Was really hoping to see a consistent dip for full and new moon phases.
### However, I wasn't expecting to see such a dip in resting heart rate during 2021! This coincided with the pandemic of course, which is when I began boxing vigorously

In [16]:
moon_phase_palette = [
    "#333399",  # first
    "#ccccff",  # full
    "#666699",   # last
    "#000033",  # new 
]

resting_moon_fig = px.bar(
    avg_resting_per_phase.query('year != 2017'), 
    x="year", 
    y="value",
    color = 'moon_phase', 
    title="Average Resting Heart Rate During Moon Phases", 
    barmode = 'group',
    template='plotly',
    color_discrete_sequence= moon_phase_palette
    )
resting_moon_fig.update_yaxes(range = [50,62], title = 'Average Resting Heart Rate (BPM)')
resting_moon_fig.update_xaxes(title = 'Year')
resting_moon_fig.update_legends(title = 'Moon Phase')
resting_moon_fig

### I just so happened to be monitoring my body weight during this period as well!

In [25]:
weight = apple_data[apple_data.type == 'HKQuantityTypeIdentifierBodyMass'].copy().reset_index(drop=True)
weight

Unnamed: 0,type,unit,value,sourceName,sourceVersion,device,creationDate,startDate,endDate
0,HKQuantityTypeIdentifierBodyMass,lb,186.4,Health,16.0.3,,2023-07-08 09:50:38,2023-07-08 09:50:00,2023-07-08 09:50:00
1,HKQuantityTypeIdentifierBodyMass,lb,187.1,Health,16.0.3,,2023-07-15 10:09:27,2023-07-15 10:09:00,2023-07-15 10:09:00
2,HKQuantityTypeIdentifierBodyMass,lb,186.8,Health,16.0.3,,2023-07-16 09:55:04,2023-07-16 09:55:00,2023-07-16 09:55:00
3,HKQuantityTypeIdentifierBodyMass,lb,177.2,Health,15.3.1,,2022-03-21 08:38:36,2022-03-21 08:38:00,2022-03-21 08:38:00
4,HKQuantityTypeIdentifierBodyMass,lb,176.3,Health,15.3.1,,2022-03-26 10:23:04,2022-03-26 10:23:00,2022-03-26 10:23:00
...,...,...,...,...,...,...,...,...,...
57,HKQuantityTypeIdentifierBodyMass,lb,173.0,Christian’s Apple Watch,4.1,,2017-12-25 13:31:20,2017-12-25 13:31:20,2017-12-25 13:31:20
58,HKQuantityTypeIdentifierBodyMass,lb,182.0,Christian,11.2.6,,2018-03-19 20:05:07,2018-03-19 20:05:07,2018-03-19 20:05:07
59,HKQuantityTypeIdentifierBodyMass,lb,182.0,Christian,11.2.6,,2018-03-19 20:05:12,2018-03-19 20:05:12,2018-03-19 20:05:12
60,HKQuantityTypeIdentifierBodyMass,lb,183.5,Christian,11.2.6,,2018-03-21 18:44:57,2018-03-21 18:44:57,2018-03-21 18:44:57


In [28]:
weight_plot = go.Figure()   
weight_plot.add_trace(go.Scatter(x=weight.startDate, y=weight.value, mode='markers'))
weight_plot