## This notebook represents a relatively unstructured exploration into my biometric data. For nice plots, see the plots folder or the pretty_plots.ipynb for code

In [63]:
import pandas as pd
import xml.etree.ElementTree as et
import polars as pl
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from astral import moon
import datetime as dt
import os
os.chdir(r"C:\Users\Christian\code_projects\apple-heart-rate")

### Preprocessing, memory optimization

In [3]:
tree = et.parse(r"C:\Users\Christian\code_projects\health\apple_health_export\export.xml")
root = tree.getroot()
records = []

for record in root.findall('Record'):
    record_data = {
        'type': record.attrib.get('type'),
        'unit': record.attrib.get('unit'),
        'value': record.attrib.get('value'),
        'sourceName': record.attrib.get('sourceName'),
        'sourceVersion': record.attrib.get('sourceVersion'),
        'device': record.attrib.get('device'),
        'creationDate': record.attrib.get('creationDate'),
        'startDate': record.attrib.get('startDate'),
        'endDate': record.attrib.get('endDate')
    }
    records.append(record_data)

apple_data = pd.DataFrame(records)
print(apple_data.info())
apple_data.type = apple_data.type.astype('category')
apple_data.unit = apple_data.unit.astype('category')
apple_data.value = pd.to_numeric(apple_data.value, errors='coerce')
apple_data.sourceName = apple_data.sourceName.astype('category')
apple_data.device = apple_data.device.astype('category')
apple_data.creationDate = pd.to_datetime(apple_data.creationDate).dt.tz_localize(None)
apple_data.startDate = pd.to_datetime(apple_data.startDate).dt.tz_localize(None)
apple_data.endDate = pd.to_datetime(apple_data.endDate).dt.tz_localize(None)
print(apple_data.info())
apple_data.to_feather("data/apple_health.feather")

In [118]:
apple_data = pd.read_feather("data/apple_health.feather")


In [119]:
all_metrics = apple_data[apple_data.type.str.contains('heart|step|stand|mass',case=False)].copy().sort_values(by='startDate').reset_index(drop=True)
del apple_data

In [120]:
all_metrics.type.unique().tolist()

['HKQuantityTypeIdentifierStepCount',
 'HKCategoryTypeIdentifierAppleStandHour',
 'HKQuantityTypeIdentifierWalkingHeartRateAverage',
 'HKQuantityTypeIdentifierRestingHeartRate',
 'HKQuantityTypeIdentifierHeartRate',
 'HKQuantityTypeIdentifierBodyMass',
 'HKQuantityTypeIdentifierHeartRateVariabilitySDNN',
 'HKCategoryTypeIdentifierHighHeartRateEvent',
 'HKQuantityTypeIdentifierAppleStandTime',
 'HKQuantityTypeIdentifierWalkingStepLength',
 'HKQuantityTypeIdentifierBodyMassIndex']

## Now that's a lot of heartbeats! How can I dive deeper...

In [121]:
heart_rate = all_metrics.query('type == "HKQuantityTypeIdentifierHeartRate"').copy()
heart_rate_fig = px.line(heart_rate[heart_rate.startDate >= pd.to_datetime('2024-07-01')], x = 'startDate',y = 'value')
heart_rate_fig

### I just realized, I take my watch off when I sleep. By looking "between" the heart rate data, I have a pretty decent estimate of my sleep pattern for the past 7 years

In [122]:
#extracting sleep times like this give you the date of morning after sleep
heart_rate['measurement_gap'] = heart_rate.startDate.diff()
heart_rate['inferred_sleep'] = heart_rate.measurement_gap > pd.Timedelta(hours=4)
inferred_sleep = heart_rate[heart_rate.inferred_sleep == True][['startDate','measurement_gap']].copy().reset_index(drop=True)
inferred_sleep = inferred_sleep.assign(
    hours_of_sleep = inferred_sleep.measurement_gap.dt.total_seconds() / 3600,
    #this provides the date of previous day, when sleep started
    sleep_date = inferred_sleep.startDate - pd.Timedelta(days=1)
    )
inferred_sleep = inferred_sleep.rename(columns = {'startDate':'woke_up_date'})

inferred_sleep['day_of_week'] = inferred_sleep.sleep_date.dt.day_name()

#remove some outliers, i don't think i slept more than 15 hours
inferred_sleep = inferred_sleep[inferred_sleep.hours_of_sleep < 15].reset_index(drop=True)
inferred_sleep['std_dev'] = inferred_sleep.hours_of_sleep.rolling(window = 60).std()
inferred_sleep['avg_sleep'] = inferred_sleep.hours_of_sleep.rolling(window = 60).mean()

# color scale for the days of the week if I need them
color_dict = {'Monday':'blue', 'Tuesday':'orange', 'Wednesday':'green', 'Thursday':'red', 'Friday':'purple', 'Saturday':'black', 'Sunday':'gray'}
inferred_sleep['day_color'] = inferred_sleep.day_of_week.map(color_dict)

#### Woah! So, obviously there is some noise here due to me forgetting to put my watch on/take it off, but there is an interesting trend towards consistency in recent years as I have become more in tune with my sleep (aka less fun)

#### There's also less of a trend than I expected for sleep length vs day of week. I was especially shocked to see weekend evenings did not result in less sleep

#### Also, there is clearly some skew as a result of me not falling asleep immediately after taking my watch off ()

In [123]:
sleep_chart = make_subplots()

for day in inferred_sleep.day_of_week.unique():
    sleep_chart.add_trace(go.Scatter(x=inferred_sleep.sleep_date[inferred_sleep.day_of_week == day], y=inferred_sleep.hours_of_sleep[inferred_sleep.day_of_week == day], mode='markers', name=day))
sleep_chart.add_trace(go.Scatter(x=inferred_sleep.sleep_date, y=inferred_sleep.std_dev, mode='lines', line=dict(width=3), name='Rolling Bi-Monthly Std. Deviation'))
sleep_chart.add_trace(go.Scatter(x=inferred_sleep.sleep_date, y=inferred_sleep.avg_sleep, mode='lines', line=dict(width=2, color = 'black'), name='Rolling Bi-Monthly Mean'))
sleep_chart.update_layout(
    xaxis = dict(title = "Date", range = [pd.to_datetime('2017-12-24'), pd.to_datetime('2024-10-01')]),
    yaxis = dict(title = "Hours of sleep", range = [0, 18], dtick = 2),
    title = "Inferred Sleep From Heart Rate Gaps",
)

In [124]:
inferred_sleep.to_feather('data/inferred_sleep.feather')

## Moon Phase and Heart Rate
### Bear with me here. This paper discusses the influence moon phase has on heart rate: https://doi.org/10.1007/s00484-012-0605-z. What am I supposed to not see for myself??
### Hoping to see a slight decrease in my resting heart rate during full and new moon phases to corroborate the paper


In [125]:
def get_moon_phase(date):
    phase_dict = {
    range(0, 7): "new_moon",
    range(7, 14): "first_quarter",
    range(14, 21): "full_moon",
    range(21, 28): "last_quarter"
    }
    for num_range,phase_name in phase_dict.items():
        if int(moon.phase(date)) in num_range:
            return phase_name



heart_rate['moon_phase'] = heart_rate.startDate.apply(get_moon_phase)


In [127]:
heart_rate['year'] = heart_rate.startDate.dt.year
heart_rate.to_feather('data/heart_rate.feather')
avg_rates_phase = heart_rate[heart_rate.value < 80].groupby(['moon_phase','year']).value.mean().reset_index()

In [132]:
resting_heart = all_metrics[all_metrics.type.str.contains('resting',case=False)].copy().reset_index(drop=True)
resting_heart['moon_phase'] = resting_heart.startDate.apply(get_moon_phase)
resting_heart['year'] = resting_heart.startDate.dt.year
resting_heart.to_feather('data/resting_heart.feather')
avg_resting_per_phase = resting_heart.groupby(['moon_phase','year']).value.mean().reset_index()


### Hmm, underwhelming results.
### However, I wasn't expecting to see such a dip in resting heart rate during 2021! This coincided with the pandemic of course, which is when I began boxing regularly

In [97]:
moon_phase_palette = {
    'first_quarter':'#333399', 
    'full_moon': '#ccccff',  
    'last_quarter':'#666699', 
    'new_moon':'#000033',
}
resting_moon_fig = go.Figure()
for phase,color in moon_phase_palette.items():
    phase_data = avg_resting_per_phase.query(f'moon_phase == "{phase}"')
    resting_moon_fig.add_trace(go.Bar(
        x=phase_data.query('year != 2017').year, 
        y=phase_data.query('year != 2017').value, 
        name=phase,
        marker_color = color))

resting_moon_fig.update_layout(
    title = "Average Resting Heart Rate by Moon Phase",
    barmode = "group",
    xaxis = dict(title = "Year"),
    yaxis = dict(
        title = "Average Resting Heart Rate (BPM)", 
        range = [53,63],
        dtick = 2),
    legend = dict(title = 'Moon Phase')
    )
    
resting_moon_fig

### I just so happened to be monitoring my body weight during this period as well!

In [133]:
weight = all_metrics[all_metrics.type == 'HKQuantityTypeIdentifierBodyMass'].copy().reset_index(drop=True)

### Let's add it into the heart rate + moon phase plot

In [92]:
#challenging to get these overlaid on the same plot, will likely give it a try in ggplot for a final pretty image

heart_weight_fig = make_subplots(
    rows=2, cols=1, 
    subplot_titles=('Resting Heart Rate by Moon Phase', 'Weight Over Time'),
    shared_xaxes=True  
)
for trace in resting_moon_fig.data:
    heart_weight_fig.add_trace(trace, row=1, col=1)
heart_weight_fig.add_trace(
    go.Scatter(x=weight.startDate, y=weight.value, mode='lines', name='Weight'),
    row=2, col=1
)
heart_weight_fig.update_layout(
    title='Resting Heart Rate vs Weight (and Moon Phase)',
    xaxis_title='Year', 
    xaxis2_title='Date',  
)
heart_weight_fig.update_yaxes(
    title_text="Resting Heart Rate (BPM)", 
    row=1, col=1,  
    range=[53, 63]
)
heart_weight_fig.update_yaxes(
    title_text="Weight (lbs)", 
    row=2, col=1,  
    range=[145, 210]
)
heart_weight_fig
