In [None]:
import pandas as pd

from parse import parse
from datetime import datetime

from random import randint, uniform

import os
import gpxpy

In [None]:
df = pd.read_xml('export.xml')

In [None]:
parsed = df.drop(columns=['HKCharacteristicTypeIdentifierDateOfBirth', 'HKCharacteristicTypeIdentifierBiologicalSex',
                'HKCharacteristicTypeIdentifierBloodType', 'HKCharacteristicTypeIdentifierFitzpatrickSkinType',
                'HKCharacteristicTypeIdentifierCardioFitnessMedicationsUse', 'HeartRateVariabilityMetadataList', 
                'MetadataEntry', 'totalDistanceUnit', 'durationUnit', 'totalEnergyBurnedUnit', 'WorkoutEvent', 
                'WorkoutRoute', 'activeEnergyBurnedGoal', 'activeEnergyBurnedUnit', 'appleMoveTime', 
                'appleMoveTimeGoal', 'appleExerciseTimeGoal', 'appleStandHoursGoal'])

In [None]:
# duration in minutes
# distance in  miles
# energy burned in calories
parsed.columns

In [None]:
parsed['type'].unique()

In [None]:
parsed.head(10)

In [None]:
def fmt_hr_date(date):
    return str(datetime.strptime(date, '%Y-%m-%d %H:%M:%S %z').replace(second=0, minute=0, hour=0, tzinfo=None))[:-9]

resting_hr = parsed[parsed['type'] == 'HKQuantityTypeIdentifierRestingHeartRate']
resting_hr['dateComponents'] = resting_hr['creationDate'].apply(fmt_hr_date)
resting_hr = resting_hr[['value', 'dateComponents']]
resting_hr.columns = ['hr', 'dateComponents']
resting_hr

In [None]:
def fmt_hr_date(date):
    return str(datetime.strptime(date, '%Y-%m-%d %H:%M:%S %z').replace(second=0, minute=0, hour=0, tzinfo=None))[:-9]

# put your own source name
distance = parsed[(parsed['type'] == 'HKQuantityTypeIdentifierDistanceWalkingRunning') & (parsed['sourceName'] != 'Arun’s iPhone')]
distance['dateComponents'] = distance['creationDate'].apply(fmt_hr_date)
distance['value'] = distance['value'].apply(lambda x: float(x))
distance = distance.groupby(['dateComponents'], as_index=False).agg({'value': 'sum'})
distance.columns = ['dateComponents', 'distance']
distance

In [None]:
hr_dist = resting_hr.merge(distance, on='dateComponents')
hr_dist

In [None]:
goals = parsed[parsed['dateComponents'].notnull()]
goals = goals[['dateComponents', 'activeEnergyBurned', 'appleExerciseTime', 'appleStandHours']]
goals = goals.merge(hr_dist, on='dateComponents', how='left')
goals

In [None]:
def fmt_wkt_date(date):
    return datetime.strptime(date, '%Y-%m-%d %H:%M:%S %z').replace(second=0, tzinfo=None)

workouts = parsed[parsed['workoutActivityType'].notnull()]
workouts = workouts[['creationDate', 'startDate', 'workoutActivityType', 'duration', 'totalDistance', 'totalEnergyBurned']]
workouts['fmtDate'] = workouts['creationDate'].apply(fmt_wkt_date)
workouts.to_csv('workouts.csv')
workouts

In [None]:
points = []
# put your own path to workout-routes
path = "/Users/arundhingra/Downloads/apple_health_export/workout-routes"
dir_list = os.listdir(path)

def fmt_route_date(route_date):
    res = parse('{:d}-{:d}-{:d} {:d}:{:d}{:l}', route_date)
    route_date = "{}-{:02d}-{:02d} {:02d}:{:02d}{}".format(res[0], res[1], 
                                                     res[2], res[3], 
                                                     res[4], res[5])
    return datetime.strptime(route_date, '%Y-%m-%d %I:%M%p')

for path in dir_list:
    gpx = gpxpy.parse(open("workout-routes/{}".format(path)))
    first_point = gpx.tracks[0].segments[0].points[0]
    date = gpx.tracks[0].name[6:]
    points.append((fmt_route_date(date), first_point.latitude, first_point.longitude))

    
points

In [None]:
from datetime import timedelta
dates = list(map(lambda x: x[0], points))
matched = []
margin = timedelta(hours = 2)
new_workout_dates = []
for i in workouts['fmtDate']:
    found = [x for x in dates if x - margin < i < x + margin]
    if found:
        new_workout_dates.append(pd.Timestamp(found[0]))
        dates.remove(found[0])
    else:
        new_workout_dates.append(i)

workouts['fmtDate'] = new_workout_dates

In [None]:
workouts['onlyDate'] = workouts['fmtDate'].apply(lambda x: x.replace(second=0, minute=0, hour=0))

In [None]:
# i had to hardcode non-cardio related coordinates for my project
# until Apple allows use to use location data for normal workouts, this is what we're stuck with
varsity = (38.99089726384877, -76.9343323725462)
deep_creek = (39.514183, -79.309469)
rehoboth = (38.733435, -75.078909)
echo = (38.919791, -76.972322)
room = (38.99197562859428, -76.93441835548659)
outside_varsity = (38.99295439860751, -76.93546173743849)
home = (38.922336476725015, -77.37557836807055)

together = []
loc = [
    varsity,varsity,varsity,varsity,varsity,varsity,
    varsity,varsity,rehoboth,varsity,varsity,varsity,
    varsity,varsity,varsity,varsity,varsity,varsity,
    varsity,varsity,varsity,varsity,varsity,varsity,
    varsity,varsity,varsity,varsity,varsity,varsity,
    varsity,deep_creek,deep_creek,deep_creek,varsity,varsity,
    varsity,varsity,varsity,varsity,room,varsity,
    varsity,echo,echo,room,varsity,varsity,
    varsity,varsity,varsity,varsity,varsity,varsity,
    varsity,room,varsity,varsity,varsity,varsity,
    varsity,varsity,varsity,varsity,varsity,varsity,
    varsity,varsity,home,varsity,varsity,varsity,
    home,home,home,home,home,home,
    varsity,home,home,home,home,home,
    home,home,home,home,varsity,varsity,
    varsity,varsity,room,room,outside_varsity,room,
    room,varsity,room,room,room,varsity,
    varsity,room,varsity,varsity,varsity,room,
    outside_varsity,varsity,room
]

In [None]:
tracked = pd.DataFrame(points, columns=['fmtDate', 'latitude', 'longitude'])
joined = workouts.merge(tracked, on='fmtDate', how='left')
joined

In [None]:
from math import isnan

longs = joined['longitude']
lats = joined['latitude']
date = joined['fmtDate']

i = 0
li = 0
while i < len(longs):
    if isnan(longs[i]):
        lats[i] = loc[li][0]
        longs[i] = loc[li][1]
        li += 1
    i += 1
        
joined['longitude'] = longs
joined['latitude'] = lats
joined['workoutActivityType'] = joined['workoutActivityType'].apply(lambda x: x.replace('HKWorkoutActivityType', ''))

In [None]:
import random

def jitter(coords):
    new_lat = uniform(0,.01)
    new_long = uniform(0,.01)
        
    if (random.randint(1, 50) % 2 == 0):
        new_lat += coords[0]
    else:
        new_lat = coords[0] - new_lat
        
    if (random.randint(1, 50) % 2 == 0):
        new_long += coords[1]
    else:
        new_long = coords[1] - new_long
        
    return (new_lat, new_long)

zipped = zip(joined['latitude'], joined['longitude'])
jittered = list(map(jitter, zipped))
joined['latitude'] = list(map(lambda x: x[0], jittered))
joined['longitude'] = list(map(lambda x: x[1], jittered))
joined

In [None]:
joined.to_csv('workouts.csv')
goals.to_csv('goals.csv')

In [None]:
len(joined[joined['workoutActivityType'] == 'Running'])