In [1]:
import xml.etree.ElementTree as ET
import pandas as pd
import datetime as dt

# Import health data from exported XML file
tree = ET.parse('./export.xml') 
root = tree.getroot()

# Define the time range you would like to use 
start_date = pd.to_datetime(dt.date(2023, 1, 1), utc=True)
end_date = pd.to_datetime(dt.date(2023, 1, 31), utc=True)

# Define export csv files names
records_output_csv = "records_data.csv"
workouts_output_csv = "workouts_data.csv"

In [None]:
# Load records data
record_list = [x.attrib for x in root.iter('Record')]
record_data = pd.DataFrame(record_list)

# Clean up records data
record_data_cleaned = record_data.drop(['sourceName','sourceVersion', 'device'], axis=1)
record_data_cleaned['type'] = record_data_cleaned['type'].str.replace('HKQuantityTypeIdentifier', '')
record_data_cleaned['type'] = record_data_cleaned['type'].str.replace('HKCategoryTypeIdentifier', '')
record_data_cleaned['value'] = pd.to_numeric(record_data_cleaned['value'], errors='coerce')
for col in ["creationDate", "startDate", "endDate"]:
    record_data_cleaned[col] = pd.to_datetime(record_data_cleaned[col])

# Export records data to csv
record_data = record_data_cleaned.loc[(record_data_cleaned["startDate"] >= start_date) & (record_data_cleaned["startDate"] <= end_date)]
record_data.reset_index(drop=True, inplace=True)
record_data.sort_values(by=["startDate"]).to_csv(f"./{records_output_csv}")

In [5]:
# Load workouts data
workout_list = [x.attrib for x in root.iter('Workout')]
workout_data = pd.DataFrame(workout_list)

# Clean up workouts data
workout_data_cleaned = workout_data.drop(['sourceName','sourceVersion', 'device'], axis=1)
workout_data_cleaned['workoutActivityType'] = workout_data_cleaned['workoutActivityType'].str.replace('HKWorkoutActivityType', '')
for col in ["creationDate", "startDate", "endDate"]:
    workout_data_cleaned[col] = pd.to_datetime(workout_data_cleaned[col])

# Export workouts data to csv
workout_data = workout_data_cleaned.loc[(workout_data_cleaned["startDate"] >= start_date) & (workout_data_cleaned["startDate"] <= end_date) ]                        
workout_data.reset_index(drop=True, inplace=True)
workout_data.sort_values(by=["startDate"]).to_csv(f"./{workouts_output_csv}")