UNZIP APPLE HEALTH EXPORT

In [None]:
import zipfile
with zipfile.ZipFile('export.zip', 'r') as zip_ref:
    zip_ref.extractall()

IMPORT LIBRARIES

In [None]:
import xml.etree.ElementTree as ET
import pandas as pd
import datetime as dt
import matplotlib.pyplot as plt
# plt.style.use("fivethirtyeight")
plt.style.use('plot-style.mplstyle')

import seaborn as sns



In [None]:
# create element tree object
tree = ET.parse('apple_health_export/Export.xml') 
# for every health record, extract the attributes
root = tree.getroot()
record_list = [x.attrib for x in root.iter('Record')]

In [None]:
import sys

In [None]:
record_data = pd.DataFrame(record_list)

# proper type to dates
for col in ['creationDate', 'startDate', 'endDate']:
    record_data[col] = pd.to_datetime(record_data[col])

# value is numeric, NaN if fails
record_data['value'] = pd.to_numeric(record_data['value'], errors='coerce')

# some records do not measure anything, just count occurences
# filling with 1.0 (= one time) makes it easier to aggregate
record_data['value'] = record_data['value'].fillna(1.0)

# shorter observation names
record_data['type'] = record_data['type'].str.replace('HKQuantityTypeIdentifier', '')
record_data['type'] = record_data['type'].str.replace('HKCategoryTypeIdentifier', '')
record_data.tail()

In [None]:
#add time length for each measurement. add date field.
record_data['measure_time_secs'] = (record_data['endDate'] - record_data['startDate']).dt.total_seconds()
record_data['day'] = record_data['startDate'].dt.date

# Create new values field for each measurement type
rec_types = record_data.sort_values('type').type.unique()

pivoted_rectypes = pd.concat([
    record_data.query(f"type == '{x}'")['value'].rename(x) for x in rec_types
], axis=1)

record_data = pd.concat([record_data,pivoted_rectypes],axis=1)
record_data.drop(columns=['value', 'type'],inplace=True)

#export to file
try:
    record_data.to_parquet('record_data.parquet',index=False)
except:
    record_data.to_csv('record_data.csv',index=False)

record_data.head(2)

In [None]:
record_data.columns

# Sleep HRV DATA

Heart Rate Variability measurements are supposedly more accurate during sleep. Therefore below filters HRV measurements using the Sleep data from the Apple Watch to HRV during sleep only.

In [None]:
record_data['date'] = pd.to_datetime(record_data['day'], yearfirst=True)

# Get Sleep Times
sleep_data = record_data.query("SleepAnalysis ==1").query("sourceName == 'George’s Apple\xa0Watch'")
when_asleep = pd.concat([
    sleep_data.groupby('date')['startDate'].min().rename('sleep_start')
    , sleep_data.groupby('date')['endDate'].max().rename('sleep_end')
], axis=1)

# record_data[['startDate','endDate','day','HeartRateVariabilitySDNN']] 
hrv_data = record_data.dropna(subset='HeartRateVariabilitySDNN')[['startDate','endDate','date','HeartRateVariabilitySDNN']]

merged_hrv_sleep = hrv_data.merge(
    right=when_asleep,
    on='date'
)

hrv_asleep = merged_hrv_sleep[
    (
        merged_hrv_sleep.startDate > merged_hrv_sleep.sleep_start
    ) & (
        merged_hrv_sleep.endDate < merged_hrv_sleep.sleep_end
    )
]
hrv_asleep

In [None]:
hrv_asleep.to_csv('sleeping_hrv_measurements.csv', index=False)

In [None]:
import plotly.express as px
import pandas as pd

In [None]:
hrv_asleep = pd.read_csv('sleeping_hrv_measurements.csv')
hrv_asleep['date'] = pd.to_datetime(hrv_asleep['date'])

In [None]:
to_plot = hrv_asleep.assign(year = lambda x: x.date.dt.strftime('%Y'))

In [None]:
fig = px.histogram(
    to_plot,
    x='HeartRateVariabilitySDNN',
    color='year',
    title='Heart Rate Variability Measurements<br>(When asleep) 2021-2023'
)
with open('HRV During Sleep 2021-2023.html', 'w') as f:
    f.write(fig.to_html(full_html=False, include_plotlyjs='cdn'))

In [None]:
sns.kdeplot(hrv_asleep,x='HeartRateVariabilitySDNN')