In [17]:
import pandas as pd
import xml.etree.ElementTree as ET
import datetime
import matplotlib.pyplot as plt

In [9]:
# Load the XML file
tree = ET.parse('apple_health_export/exportar.xml')

# Get the root element
root = tree.getroot()
records = []

# Iterate through elements in the XML
for child in root:
    # Check if the element is a Record
    if child.tag == 'Record':
        # Collect attributes of the Record element
        record_data = child.attrib
        # Also collect data from MetadataEntry children if needed
        metadata_entries = {}
        for subchild in child:
            if subchild.tag == 'MetadataEntry':
                key = subchild.attrib.get('key')
                value = subchild.attrib.get('value')
                metadata_entries[key] = value
        # Combine Record attributes and MetadataEntry data
        record_data.update(metadata_entries)
        # Add the combined data to the records list
        records.append(record_data)

# Convert the list of records to a DataFrame
df_records = pd.DataFrame(records)

# shorter observation names: use vectorized replace function
df_records['type'] = df_records['type'].str.replace('HKQuantityTypeIdentifier', '')
df_records['type'] = df_records['type'].str.replace('HKCategoryTypeIdentifier', '')

# Save the DataFrame to a CSV file
df_records.to_csv('apple_health_records.csv', index=False)

print("CSV file has been created.")

CSV file has been created.


In [4]:
tree = ET.parse('apple_health_export/exportar.xml')

# for every health record, extract the attributes into a dictionary (columns). Then create a list (rows).
root = tree.getroot()
record_list = [x.attrib for x in root.iter('Record')]

# create DataFrame from a list (rows) of dictionaries (columns)
data = pd.DataFrame(record_list)

# proper type to dates
for col in ['creationDate', 'startDate', 'endDate']:
    data[col] = pd.to_datetime(data[col])

# value is numeric, NaN if fails 
# comment out this to detailed stages
data['value'] = pd.to_numeric(data['value'], errors='coerce')

# some records do not measure anything, just count occurrences
# filling with 1.0 (= one time) makes it easier to aggregate
data['value'] = data['value'].fillna(1.0) #comment out this to detailed stages

# shorter observation names: use vectorized replace function
data['type'] = data['type'].str.replace('HKQuantityTypeIdentifier', '')
data['type'] = data['type'].str.replace('HKCategoryTypeIdentifier', '')

# save into CSV as this is a universally compatible data format
data.to_csv("apple_export.csv", index=False)

In [6]:
sleep_data = data[data['type'] == "SleepAnalysis"]
sleep_data = sleep_data[sleep_data['sourceName'] == "Apple Watch de Edson"]

display(sleep_data)

Unnamed: 0,type,sourceName,sourceVersion,unit,creationDate,startDate,endDate,value,device
1313427,SleepAnalysis,Apple Watch de Edson,10.4,,2024-04-28 07:03:19-03:00,2024-04-27 23:37:45-03:00,2024-04-27 23:51:15-03:00,1.0,
1313428,SleepAnalysis,Apple Watch de Edson,10.4,,2024-04-28 07:03:19-03:00,2024-04-27 23:51:15-03:00,2024-04-28 00:13:15-03:00,1.0,
1313429,SleepAnalysis,Apple Watch de Edson,10.4,,2024-04-28 07:03:19-03:00,2024-04-28 00:13:15-03:00,2024-04-28 00:21:15-03:00,1.0,
1313430,SleepAnalysis,Apple Watch de Edson,10.4,,2024-04-28 07:03:19-03:00,2024-04-28 00:21:15-03:00,2024-04-28 00:30:45-03:00,1.0,
1313431,SleepAnalysis,Apple Watch de Edson,10.4,,2024-04-28 07:03:19-03:00,2024-04-27 23:37:45-03:00,2024-04-28 00:30:45-03:00,1.0,
...,...,...,...,...,...,...,...,...,...
1318041,SleepAnalysis,Apple Watch de Edson,10.5,,2024-08-15 10:37:32-03:00,2024-08-15 08:16:29-03:00,2024-08-15 08:21:59-03:00,1.0,
1318042,SleepAnalysis,Apple Watch de Edson,10.5,,2024-08-15 10:37:32-03:00,2024-08-15 08:21:59-03:00,2024-08-15 08:52:29-03:00,1.0,
1318043,SleepAnalysis,Apple Watch de Edson,10.5,,2024-08-15 10:37:32-03:00,2024-08-15 08:52:29-03:00,2024-08-15 09:23:59-03:00,1.0,
1318044,SleepAnalysis,Apple Watch de Edson,10.5,,2024-08-15 10:37:32-03:00,2024-08-15 09:23:59-03:00,2024-08-15 09:54:59-03:00,1.0,


In [8]:
# calulate time between date(s)
sleep_data['time_asleep'] = sleep_data['endDate'] - sleep_data['startDate']

# records are grouped by creation date, so lets used that to sum up the values we need here
# total time asleep as a sum of the asleep time
# awake and bed times are max's and min's
# sleep count is the number of times the Apple Watch detected movement
# rem is the number of sleep cycles over 90 minutes (divded by 90 if they were longer than 1 cycle)
sleep_data = sleep_data.groupby('creationDate').agg(total_time_asleep=('time_asleep', 'sum'),
    bed_time=('startDate', 'min'), 
    awake_time=('endDate', 'max'), 
    sleep_counts=('creationDate','count'), 
    rem_cycles=pd.NamedAgg(column='time_asleep', aggfunc=lambda x: (x // datetime.timedelta(minutes=90)).sum()))

# Time in Bed will be different to Apple's reported figure - 
# as Apple uses the time you place your iPhone down as an additional 
# datapoint, which of course, is incorrect if you try to maintain 
# some device separation in the evenings.
# For now - we will just use Apple Watch data here
sleep_data['time_in_bed'] = sleep_data['awake_time'] - sleep_data['bed_time']
sleep_data['restless_time'] = sleep_data['time_in_bed'] - sleep_data['total_time_asleep']

In [9]:
# convert time duration to minutes for easier plotting and comparison
sleep_data['time_in_bed'] = (sleep_data['time_in_bed'].dt.total_seconds()/60)
sleep_data['total_time_asleep'] = (sleep_data['total_time_asleep'].dt.total_seconds()/60)

import matplotlib
matplotlib.use('TkAgg')
import matplotlib.pyplot as plt

from matplotlib.dates import DateFormatter, MonthLocator

chart1 = sleep_data[['time_in_bed','total_time_asleep']].plot(use_index=True)
plt.show()

In [11]:
data = pd.read_csv('apple_health_records.csv')

data = data[data['sourceName'] == "Apple Watch de Edson"]
display(data)

  data = pd.read_csv('apple_health_records.csv')


Unnamed: 0,type,sourceName,sourceVersion,unit,creationDate,startDate,endDate,value,HKWasUserEntered,device,...,HKMetadataKeyHeartRateRecoveryActivityDuration,HKMetadataKeyHeartRateRecoveryTestType,HKMetadataKeyHeartRateRecoveryMaxObservedRecoveryHeartRate,HKMetadataKeyHeartRateRecoveryActivityType,HKMetadataKeyMaximumLightIntensity,HKActivityType,HKPhysicalEffortEstimationType,HKIndoorWorkout,HKTimeZone,HKMetadataKeyAudioExposureLevel
4,Height,Apple Watch de Edson,10.4,cm,2024-04-27 23:19:57 -0300,2024-04-27 23:19:57 -0300,2024-04-27 23:19:57 -0300,178.0,,,...,,,,,,,,,,
12,BodyMass,Apple Watch de Edson,10.4,kg,2024-04-27 23:19:57 -0300,2024-04-27 23:19:57 -0300,2024-04-27 23:19:57 -0300,79.0,,,...,,,,,,,,,,
330742,HeartRate,Apple Watch de Edson,10.4,count/min,2024-04-28 08:42:18 -0300,2024-04-28 08:40:52 -0300,2024-04-28 08:40:52 -0300,88.0,,"<<HKDevice: 0x3016faf80>, name:Apple Watch, ma...",...,,,,,,,,,,
330743,HeartRate,Apple Watch de Edson,10.4,count/min,2024-04-28 08:44:45 -0300,2024-04-28 08:43:43 -0300,2024-04-28 08:43:43 -0300,84.4355,,"<<HKDevice: 0x3016faf80>, name:Apple Watch, ma...",...,,,,,,,,,,
330744,HeartRate,Apple Watch de Edson,10.4,count/min,2024-04-28 08:47:23 -0300,2024-04-28 08:43:48 -0300,2024-04-28 08:43:48 -0300,85.0,,"<<HKDevice: 0x3016faf80>, name:Apple Watch, ma...",...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1321438,HeartRateVariabilitySDNN,Apple Watch de Edson,10.5,ms,2024-08-15 07:31:56 -0300,2024-08-15 07:30:56 -0300,2024-08-15 07:31:55 -0300,29.7119,,"<<HKDevice: 0x3015745a0>, name:Apple Watch, ma...",...,,,,,,,,,,
1321439,HeartRateVariabilitySDNN,Apple Watch de Edson,10.5,ms,2024-08-15 09:31:57 -0300,2024-08-15 09:30:56 -0300,2024-08-15 09:31:56 -0300,67.7189,,"<<HKDevice: 0x3015745a0>, name:Apple Watch, ma...",...,,,,,,,,,,
1321440,HeartRateVariabilitySDNN,Apple Watch de Edson,10.5,ms,2024-08-15 11:32:49 -0300,2024-08-15 11:31:48 -0300,2024-08-15 11:32:48 -0300,79.3807,,"<<HKDevice: 0x3015745a0>, name:Apple Watch, ma...",...,,,,,,,,,,
1321441,HeartRateVariabilitySDNN,Apple Watch de Edson,10.5,ms,2024-08-15 14:10:12 -0300,2024-08-15 14:09:12 -0300,2024-08-15 14:10:11 -0300,42.9472,,"<<HKDevice: 0x3015745a0>, name:Apple Watch, ma...",...,,,,,,,,,,


In [12]:
data['type'].value_counts()

ActiveEnergyBurned               227248
BasalEnergyBurned                167758
HeartRate                        113211
DistanceWalkingRunning            44665
PhysicalEffort                    39439
StepCount                         25818
RunningSpeed                      13263
RunningPower                      13237
AppleExerciseTime                  7278
RespiratoryRate                    6423
RunningVerticalOscillation         5114
RunningGroundContactTime           4979
SleepAnalysis                      4618
RunningStrideLength                4527
AppleStandTime                     4445
EnvironmentalAudioExposure         4216
DistanceCycling                    4054
AppleStandHour                     2353
OxygenSaturation                   1851
HeartRateVariabilitySDNN           1030
TimeInDaylight                      612
EnvironmentalSoundReduction         508
FlightsClimbed                      174
StairAscentSpeed                    154
StairDescentSpeed                   121


In [15]:
exercise_time = data[data['type'] == "AppleExerciseTime"]
display(exercise_time)


for col in exercise_time.columns:
    print(exercise_time[col].value_counts())
print(exercise_time.columns)

Unnamed: 0,type,sourceName,sourceVersion,unit,creationDate,startDate,endDate,value,HKWasUserEntered,device,...,HKMetadataKeyHeartRateRecoveryActivityDuration,HKMetadataKeyHeartRateRecoveryTestType,HKMetadataKeyHeartRateRecoveryMaxObservedRecoveryHeartRate,HKMetadataKeyHeartRateRecoveryActivityType,HKMetadataKeyMaximumLightIntensity,HKActivityType,HKPhysicalEffortEstimationType,HKIndoorWorkout,HKTimeZone,HKMetadataKeyAudioExposureLevel
1137791,AppleExerciseTime,Apple Watch de Edson,10.4,min,2024-04-28 08:53:05 -0300,2024-04-28 08:51:00 -0300,2024-04-28 08:52:00 -0300,1.0,,"<<HKDevice: 0x3015746e0>, name:Apple Watch, ma...",...,,,,,,,,,,
1137792,AppleExerciseTime,Apple Watch de Edson,10.4,min,2024-04-28 08:55:57 -0300,2024-04-28 08:52:00 -0300,2024-04-28 08:53:00 -0300,1.0,,"<<HKDevice: 0x3015746e0>, name:Apple Watch, ma...",...,,,,,,,,,,
1137793,AppleExerciseTime,Apple Watch de Edson,10.4,min,2024-04-28 08:55:57 -0300,2024-04-28 08:53:00 -0300,2024-04-28 08:54:00 -0300,1.0,,"<<HKDevice: 0x3015746e0>, name:Apple Watch, ma...",...,,,,,,,,,,
1137794,AppleExerciseTime,Apple Watch de Edson,10.4,min,2024-04-28 08:58:43 -0300,2024-04-28 08:55:00 -0300,2024-04-28 08:56:00 -0300,1.0,,"<<HKDevice: 0x3015746e0>, name:Apple Watch, ma...",...,,,,,,,,,,
1137795,AppleExerciseTime,Apple Watch de Edson,10.4,min,2024-04-28 09:02:28 -0300,2024-04-28 08:58:00 -0300,2024-04-28 08:59:00 -0300,1.0,,"<<HKDevice: 0x3015746e0>, name:Apple Watch, ma...",...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1145104,AppleExerciseTime,Apple Watch de Edson,10.5,min,2024-08-15 18:45:49 -0300,2024-08-15 18:44:00 -0300,2024-08-15 18:45:00 -0300,1.0,,"<<HKDevice: 0x3015748c0>, name:Apple Watch, ma...",...,,,,,,,,,,
1145105,AppleExerciseTime,Apple Watch de Edson,10.5,min,2024-08-15 18:47:00 -0300,2024-08-15 18:45:00 -0300,2024-08-15 18:46:00 -0300,1.0,,"<<HKDevice: 0x3015748c0>, name:Apple Watch, ma...",...,,,,,,,,,,
1145106,AppleExerciseTime,Apple Watch de Edson,10.5,min,2024-08-15 18:47:59 -0300,2024-08-15 18:46:00 -0300,2024-08-15 18:47:00 -0300,1.0,,"<<HKDevice: 0x3015748c0>, name:Apple Watch, ma...",...,,,,,,,,,,
1145107,AppleExerciseTime,Apple Watch de Edson,10.5,min,2024-08-15 18:48:50 -0300,2024-08-15 18:47:00 -0300,2024-08-15 18:48:00 -0300,1.0,,"<<HKDevice: 0x3015748c0>, name:Apple Watch, ma...",...,,,,,,,,,,


AppleExerciseTime    7278
Name: type, dtype: int64
Apple Watch de Edson    7278
Name: sourceName, dtype: int64
10.5    5899
10.4    1379
Name: sourceVersion, dtype: int64
min    7278
Name: unit, dtype: int64
2024-05-08 10:05:50 -0300    4
2024-04-28 11:45:26 -0300    3
2024-04-28 15:15:03 -0300    3
2024-06-23 17:17:26 -0300    3
2024-06-24 17:45:24 -0300    3
                            ..
2024-06-10 19:24:43 -0300    1
2024-06-10 19:23:42 -0300    1
2024-06-10 19:22:41 -0300    1
2024-06-10 19:21:39 -0300    1
2024-08-15 18:49:52 -0300    1
Name: creationDate, Length: 7195, dtype: int64
2024-04-28 08:51:00 -0300    1
2024-07-14 10:02:00 -0300    1
2024-07-15 18:16:00 -0300    1
2024-07-15 18:15:00 -0300    1
2024-07-15 18:14:00 -0300    1
                            ..
2024-06-10 18:47:00 -0300    1
2024-06-10 18:46:00 -0300    1
2024-06-10 18:45:00 -0300    1
2024-06-10 18:44:00 -0300    1
2024-08-15 18:48:00 -0300    1
Name: startDate, Length: 7278, dtype: int64
2024-04-28 08:52:00

In [16]:
running_speed = data[data['type'] == "RunningSpeed"]
display(running_speed)

Unnamed: 0,type,sourceName,sourceVersion,unit,creationDate,startDate,endDate,value,HKWasUserEntered,device,...,HKMetadataKeyHeartRateRecoveryActivityDuration,HKMetadataKeyHeartRateRecoveryTestType,HKMetadataKeyHeartRateRecoveryMaxObservedRecoveryHeartRate,HKMetadataKeyHeartRateRecoveryActivityType,HKMetadataKeyMaximumLightIntensity,HKActivityType,HKPhysicalEffortEstimationType,HKIndoorWorkout,HKTimeZone,HKMetadataKeyAudioExposureLevel
1254109,RunningSpeed,Apple Watch de Edson,10.4,km/hr,2024-05-04 16:40:23 -0300,2024-05-04 16:40:21 -0300,2024-05-04 16:40:21 -0300,6.01803,,"<<HKDevice: 0x30163e5d0>, name:Apple Watch, ma...",...,,,,,,,,,,
1254110,RunningSpeed,Apple Watch de Edson,10.4,km/hr,2024-05-04 16:40:26 -0300,2024-05-04 16:40:23 -0300,2024-05-04 16:40:23 -0300,8.25646,,"<<HKDevice: 0x30163e5d0>, name:Apple Watch, ma...",...,,,,,,,,,,
1254111,RunningSpeed,Apple Watch de Edson,10.4,km/hr,2024-05-04 16:40:28 -0300,2024-05-04 16:40:26 -0300,2024-05-04 16:40:26 -0300,9.50175,,"<<HKDevice: 0x30163e5d0>, name:Apple Watch, ma...",...,,,,,,,,,,
1254112,RunningSpeed,Apple Watch de Edson,10.4,km/hr,2024-05-04 16:40:31 -0300,2024-05-04 16:40:28 -0300,2024-05-04 16:40:28 -0300,10.47,,"<<HKDevice: 0x30163e5d0>, name:Apple Watch, ma...",...,,,,,,,,,,
1254113,RunningSpeed,Apple Watch de Edson,10.4,km/hr,2024-05-04 16:40:34 -0300,2024-05-04 16:40:31 -0300,2024-05-04 16:40:31 -0300,11.9789,,"<<HKDevice: 0x30163e5d0>, name:Apple Watch, ma...",...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1267367,RunningSpeed,Apple Watch de Edson,10.5,km/hr,2024-08-15 17:14:23 -0300,2024-08-15 17:14:21 -0300,2024-08-15 17:14:21 -0300,9.33446,,"<<HKDevice: 0x30163d6d0>, name:Apple Watch, ma...",...,,,,,,,,,,
1267368,RunningSpeed,Apple Watch de Edson,10.5,km/hr,2024-08-15 17:14:26 -0300,2024-08-15 17:14:23 -0300,2024-08-15 17:14:23 -0300,9.33873,,"<<HKDevice: 0x30163d6d0>, name:Apple Watch, ma...",...,,,,,,,,,,
1267369,RunningSpeed,Apple Watch de Edson,10.5,km/hr,2024-08-15 17:14:29 -0300,2024-08-15 17:14:26 -0300,2024-08-15 17:14:26 -0300,9.46678,,"<<HKDevice: 0x30163d6d0>, name:Apple Watch, ma...",...,,,,,,,,,,
1267370,RunningSpeed,Apple Watch de Edson,10.5,km/hr,2024-08-15 17:14:31 -0300,2024-08-15 17:14:28 -0300,2024-08-15 17:14:28 -0300,9.32789,,"<<HKDevice: 0x30163d6d0>, name:Apple Watch, ma...",...,,,,,,,,,,
