# iPhone Health Data: XML to CSV

### Imports

In [2]:
import xml.etree.ElementTree as ET
import pandas as pd


## Easy Transfer XML -> Pandas DF -> CSV

- larger files will take longer, but Pandas makes column sorting easier
- 1 GB ~ 1 min

In [4]:
with open('data/export.xml') as f:
    data = ET.parse(f)
    root = data.getroot()
    records_df = pd.DataFrame([{**{'tag': child.tag}, **child.attrib} for child in root[2:]])
    # records_df.to_csv('data/health_data.csv', index=False)

## Separate CSV just for workout data

In [None]:
workout_cols = ['workoutActivityType', 'startDate', 'endDate', 'duration', 'durationUnit',
                'device', 'sourceName', 'sourceVersion', 'creationDate']

workout_df = records_df[records_df['tag'] == 'Workout'][workout_cols].reset_index(drop=True)
workout_df.to_csv('data/workouts.csv', index=False)

## Separate CSV just for activity data

In [15]:
activity_df = records_df[records_df['tag'] == 'ActivitySummary'].loc[:, 'dateComponents':].reset_index(drop=True)
activity_df.to_csv('data/activity.csv', index=False)

---

# Read & Explore Data from CSV

In [None]:
all_health_df = pd.read_csv('data/health_data.csv')


In [12]:
all_health_df.type.value_counts().head()

type
HKQuantityTypeIdentifierActiveEnergyBurned        879244
HKQuantityTypeIdentifierHeartRate                 517280
HKQuantityTypeIdentifierBasalEnergyBurned         353209
HKQuantityTypeIdentifierDistanceWalkingRunning    191020
HKQuantityTypeIdentifierStepCount                 135602
Name: count, dtype: int64

In [14]:
selected_types = ['HKQuantityTypeIdentifierActiveEnergyBurned',
                  'HKQuantityTypeIdentifierBasalEnergyBurned',
                  'HKQuantityTypeIdentifierDistanceWalkingRunning',
                  'HKQuantityTypeIdentifierFlightsClimbed']

selected_columns = ['type', 'value', 'unit', 'startDate', 'endDate']

selected_df = all_health_df[all_health_df['type'].isin(selected_types)].loc[:, :'device'][selected_columns]
selected_df.to_csv('data/selected_health_data.csv', index=False)