## Import packages

In [1]:
import xml.etree.ElementTree as ET
import pandas as pd
import datetime as dt 
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from IPython.display import display

## Load exported xml data and convert to pandas df 

In [2]:
# create element tree object 
tree = ET.parse('apple-watch-data.xml') 

In [3]:
# extract the attributes of health record
root = tree.getroot()
record_list = [x.attrib for x in root.iter('Record')]

In [4]:
# create a DataFrame from record_list
record_data = pd.DataFrame(record_list)

In [5]:
# unique elements in 'type' column of record_data
# record_data.type.unique()

In [6]:
type_title_mapping = {
    'HKQuantityTypeIdentifierHeight': 'Height',
    'HKQuantityTypeIdentifierBodyMass': 'Body Mass',
    'HKQuantityTypeIdentifierHeartRate': 'Heart Rate',
    'HKQuantityTypeIdentifierOxygenSaturation': 'Oxygen Saturation',
    'HKQuantityTypeIdentifierRespiratoryRate': 'Respiratory Rate',
    'HKQuantityTypeIdentifierStepCount': 'Step Count',
    'HKQuantityTypeIdentifierDistanceWalkingRunning': 'Walking/Running Distance',
    'HKQuantityTypeIdentifierBasalEnergyBurned': 'Basal Energy Burned',
    'HKQuantityTypeIdentifierActiveEnergyBurned': 'Active Energy Burned',
    'HKQuantityTypeIdentifierFlightsClimbed': 'Flights Climbed',
    'HKQuantityTypeIdentifierAppleExerciseTime': 'Exercise Time',
    'HKQuantityTypeIdentifierRestingHeartRate': 'Resting Heart Rate',
    'HKQuantityTypeIdentifierWalkingHeartRateAverage': 'Average Walking Heart Rate',
    'HKQuantityTypeIdentifierEnvironmentalAudioExposure': 'Environmental Audio Exposure',
    'HKQuantityTypeIdentifierHeadphoneAudioExposure': 'Headphone Audio Exposure',
    'HKQuantityTypeIdentifierWalkingDoubleSupportPercentage': 'Double Support Percentage',
    'HKQuantityTypeIdentifierSixMinuteWalkTestDistance': '6-Minute Walk Distance',
    'HKQuantityTypeIdentifierAppleStandTime': 'Stand Time',
    'HKQuantityTypeIdentifierWalkingSpeed': 'Walking Speed',
    'HKQuantityTypeIdentifierWalkingStepLength': 'Step Length',
    'HKQuantityTypeIdentifierWalkingAsymmetryPercentage': 'Walking Asymmetry',
    'HKQuantityTypeIdentifierStairAscentSpeed': 'Stair Ascent Speed',
    'HKQuantityTypeIdentifierStairDescentSpeed': 'Stair Descent Speed',
    'HKDataTypeSleepDurationGoal': 'Sleep Duration Goal',
    'HKQuantityTypeIdentifierAppleWalkingSteadiness': 'Walking Steadiness',
    'HKQuantityTypeIdentifierHeartRateRecoveryOneMinute': '1-Min Heart Rate Recovery',
    'HKQuantityTypeIdentifierPhysicalEffort': 'Physical Effort',
    'HKCategoryTypeIdentifierSleepAnalysis': 'Sleep Analysis',
    'HKCategoryTypeIdentifierAppleStandHour': 'Stand Hour',
    'HKCategoryTypeIdentifierHighHeartRateEvent': 'High Heart Rate Event',
    'HKCategoryTypeIdentifierAudioExposureEvent': 'Audio Exposure Event',
    'HKCategoryTypeIdentifierHeadphoneAudioExposureEvent': 'Headphone Exposure Event',
    'HKQuantityTypeIdentifierHeartRateVariabilitySDNN': 'Heart Rate Variability (SDNN)'
}
record_data['type'] = record_data['type'].map(type_title_mapping)

In [7]:
record_data.type.unique()

array(['Height', 'Body Mass', 'Heart Rate', 'Oxygen Saturation',
       'Respiratory Rate', 'Step Count', 'Walking/Running Distance',
       'Basal Energy Burned', 'Active Energy Burned', 'Flights Climbed',
       'Exercise Time', 'Resting Heart Rate',
       'Average Walking Heart Rate', 'Environmental Audio Exposure',
       'Headphone Audio Exposure', 'Double Support Percentage',
       '6-Minute Walk Distance', 'Stand Time', 'Walking Speed',
       'Step Length', 'Walking Asymmetry', 'Stair Ascent Speed',
       'Stair Descent Speed', 'Sleep Duration Goal', 'Walking Steadiness',
       '1-Min Heart Rate Recovery', 'Physical Effort', 'Sleep Analysis',
       'Stand Hour', 'High Heart Rate Event', 'Audio Exposure Event',
       'Headphone Exposure Event', 'Heart Rate Variability (SDNN)'],
      dtype=object)

In [8]:
date_ranges = record_data.groupby('type').agg({
    'startDate': 'min',
    'endDate': 'max'
}).reset_index()
date_ranges['startDate'] = pd.to_datetime(date_ranges['startDate']).dt.date
date_ranges['endDate'] = pd.to_datetime(date_ranges['endDate']).dt.date
# date_ranges

## Take last day data

In [9]:
filtered_data = record_data[(record_data['startDate'] >= '2025-06-02') & (record_data['startDate'] <= '2025-06-03')]

In [10]:
df = filtered_data.drop(['sourceName','sourceVersion', 'device', 'creationDate', 'endDate'], axis=1)
df.head()

Unnamed: 0,type,unit,startDate,value
76268,Heart Rate,count/min,2025-06-02 07:11:59 +0200,64
76269,Heart Rate,count/min,2025-06-02 07:17:43 +0200,69
76270,Heart Rate,count/min,2025-06-02 07:20:51 +0200,69
76271,Heart Rate,count/min,2025-06-02 07:22:49 +0200,69
76272,Heart Rate,count/min,2025-06-02 07:32:13 +0200,68


## Pivot table

In [11]:
# Convert to datetime (and optionally remove timezone info)
df['startDate'] = pd.to_datetime(df['startDate']).dt.tz_localize(None)

In [12]:
grouped = df.groupby(['startDate', 'type'])['value'].first().unstack()
grouped = grouped.reset_index()

In [13]:
# Sort by date
grouped = grouped.sort_values(by='startDate')

In [14]:
df = grouped
df.drop(columns=['Stand Hour', 'Exercise Time'], inplace=True)
df.head()

type,startDate,Active Energy Burned,Average Walking Heart Rate,Basal Energy Burned,Double Support Percentage,Environmental Audio Exposure,Flights Climbed,Headphone Audio Exposure,Heart Rate,Heart Rate Variability (SDNN),...,Physical Effort,Resting Heart Rate,Stair Ascent Speed,Stair Descent Speed,Stand Time,Step Count,Step Length,Walking Asymmetry,Walking Speed,Walking/Running Distance
0,2025-06-02 06:00:00,,,,,,,,,,...,,,,,,,,,,
1,2025-06-02 06:58:33,,,,,,,,,,...,,,,,,170.0,,,,0.124119
2,2025-06-02 06:58:37,,,,,,,,,,...,,,,,,37.0,,,,0.029
3,2025-06-02 07:00:00,,,,,,,,,,...,,,,,,,,,,
4,2025-06-02 07:07:26,,,,,66.2278,,,,,...,,,,,,,,,,


In [15]:
for col in df.columns:
    print(f"{col} → {df[col].unique()}")

startDate → <DatetimeArray>
['2025-06-02 06:00:00', '2025-06-02 06:58:33', '2025-06-02 06:58:37',
 '2025-06-02 07:00:00', '2025-06-02 07:07:26', '2025-06-02 07:08:42',
 '2025-06-02 07:09:11', '2025-06-02 07:09:17', '2025-06-02 07:09:35',
 '2025-06-02 07:10:00',
 ...
 '2025-06-02 19:48:39', '2025-06-02 19:48:49', '2025-06-02 19:49:13',
 '2025-06-02 19:50:00', '2025-06-02 19:50:29', '2025-06-02 19:50:39',
 '2025-06-02 19:54:01', '2025-06-02 19:54:12', '2025-06-02 19:55:30',
 '2025-06-02 19:55:50']
Length: 2328, dtype: datetime64[ns]
Active Energy Burned → [nan '0.779' '0.042' '18.572' '0.039' '1.405' '36.024' '3.349' '2.606'
 '0.336' '2.377' '0.511' '10.497' '0.508' '2.467' '0.072' '2.378' '0.117'
 '3.208' '1.141' '7.388' '6.54' '3.957' '0.494' '3.647' '4.917' '1.195'
 '5.927' '1.859' '3.955' '1.952' '4.295' '1.302' '3.879' '4.367' '0.381'
 '3.193' '3.122' '4.12' '9.033' '4.256' '5.123' '0.543' '2.687' '9.894'
 '8.787' '3.014' '2.162' '1.906' '0.88' '1.687' '0.356' '15.496' '0.5'
 '1.864