In [2]:
import xml.etree.ElementTree as ET
import pandas as pd
import datetime as dt

# create element tree object 
tree = ET.parse('export.xml') 

# for every health record, extract the attributes into a dictionary (columns). Then create a list (rows).
root = tree.getroot()
record_list = [x.attrib for x in root.iter('Record')]

# create DataFrame from a list (rows) of dictionaries (columns)
data = pd.DataFrame(record_list)

# proper type to dates
for col in ['creationDate', 'startDate', 'endDate']:
    data[col] = pd.to_datetime(data[col])

# value is numeric, NaN if fails
data['value'] = pd.to_numeric(data['value'], errors='coerce')

# some records do not measure anything, just count occurences
# filling with 1.0 (= one time) makes it easier to aggregate
data['value'] = data['value'].fillna(1.0)

# shorter observation names: use vectorized replace function
data['type'] = data['type'].str.replace('HKQuantityTypeIdentifier', '')
data['type'] = data['type'].str.replace('HKCategoryTypeIdentifier', '')

Sources de données
- Données Santé Apple: done 
- Données Apple: pas reçu 
- Données Uber: done 
- Données Instagram: non reçu 
- Données Fb: non reçu 

In [3]:
data

Unnamed: 0,type,sourceName,sourceVersion,device,unit,creationDate,startDate,endDate,value
0,StepCount,Claire,13.0,"<<HKDevice: 0x282531540>, name:iPhone, manufac...",count,2019-09-21 23:23:37+01:00,2019-09-21 23:12:34+01:00,2019-09-21 23:15:07+01:00,88.0
1,StepCount,Claire,13.0,"<<HKDevice: 0x282531540>, name:iPhone, manufac...",count,2019-09-21 23:53:13+01:00,2019-09-21 23:42:10+01:00,2019-09-21 23:42:13+01:00,8.0
2,StepCount,iPhone de Claire,12.1.3,"<<HKDevice: 0x282530aa0>, name:iPhone, manufac...",count,2019-02-02 20:06:09+01:00,2019-02-02 19:55:46+01:00,2019-02-02 20:05:18+01:00,450.0
3,StepCount,iPhone de Claire,12.1.3,"<<HKDevice: 0x282530aa0>, name:iPhone, manufac...",count,2019-02-02 20:23:30+01:00,2019-02-02 20:05:18+01:00,2019-02-02 20:05:58+01:00,65.0
4,StepCount,iPhone de Claire (2),,,count,2015-05-07 23:07:36+01:00,2015-05-07 21:37:56+01:00,2015-05-07 21:38:04+01:00,26.0
...,...,...,...,...,...,...,...,...,...
182434,SleepAnalysis,Claire,14.4.2,,,2021-12-14 08:15:02+01:00,2021-12-14 07:50:33+01:00,2021-12-14 07:54:15+01:00,1.0
182435,SleepAnalysis,Claire,14.4.2,,,2021-12-14 07:57:28+01:00,2021-12-14 07:54:53+01:00,2021-12-14 07:56:34+01:00,1.0
182436,SleepAnalysis,Claire,14.4.2,,,2021-12-14 08:15:02+01:00,2021-12-14 07:54:53+01:00,2021-12-14 07:56:34+01:00,1.0
182437,SleepAnalysis,Claire,14.4.2,,,2021-12-14 08:15:02+01:00,2021-12-14 07:59:14+01:00,2021-12-14 08:15:02+01:00,1.0


In [4]:
data.groupby(['type']).mean()

Unnamed: 0_level_0,value
type,Unnamed: 1_level_1
ActiveEnergyBurned,0.506205
DistanceWalkingRunning,0.114822
FlightsClimbed,1.393526
HKDataTypeSleepDurationGoal,8.5
HeadphoneAudioExposure,61.969173
SleepAnalysis,1.0
StepCount,187.592751
WalkingAsymmetryPercentage,0.023039
WalkingDoubleSupportPercentage,0.293766
WalkingSpeed,4.411554


In [5]:
sleep_data = data.loc[data['type'] == 'SleepAnalysis']

In [6]:
sleep_data

Unnamed: 0,type,sourceName,sourceVersion,device,unit,creationDate,startDate,endDate,value
181320,SleepAnalysis,Horloge,50,"<<HKDevice: 0x2825dd770>, name:iPhone, manufac...",,2017-03-05 09:45:12+01:00,2017-03-05 00:45:00+01:00,2017-03-05 02:51:52+01:00,1.0
181321,SleepAnalysis,Horloge,50,"<<HKDevice: 0x2825dd770>, name:iPhone, manufac...",,2017-03-05 09:45:12+01:00,2017-03-05 02:52:52+01:00,2017-03-05 09:45:11+01:00,1.0
181322,SleepAnalysis,Horloge,50,"<<HKDevice: 0x2825dd770>, name:iPhone, manufac...",,2017-03-12 10:45:21+01:00,2017-03-12 03:13:40+01:00,2017-03-12 03:14:44+01:00,1.0
181323,SleepAnalysis,Horloge,50,"<<HKDevice: 0x2825dd770>, name:iPhone, manufac...",,2017-03-12 10:45:21+01:00,2017-03-12 03:15:16+01:00,2017-03-12 03:17:00+01:00,1.0
181324,SleepAnalysis,Horloge,50,"<<HKDevice: 0x2825dd770>, name:iPhone, manufac...",,2017-03-12 10:45:21+01:00,2017-03-12 03:17:00+01:00,2017-03-12 03:48:16+01:00,1.0
...,...,...,...,...,...,...,...,...,...
182434,SleepAnalysis,Claire,14.4.2,,,2021-12-14 08:15:02+01:00,2021-12-14 07:50:33+01:00,2021-12-14 07:54:15+01:00,1.0
182435,SleepAnalysis,Claire,14.4.2,,,2021-12-14 07:57:28+01:00,2021-12-14 07:54:53+01:00,2021-12-14 07:56:34+01:00,1.0
182436,SleepAnalysis,Claire,14.4.2,,,2021-12-14 08:15:02+01:00,2021-12-14 07:54:53+01:00,2021-12-14 07:56:34+01:00,1.0
182437,SleepAnalysis,Claire,14.4.2,,,2021-12-14 08:15:02+01:00,2021-12-14 07:59:14+01:00,2021-12-14 08:15:02+01:00,1.0


In [7]:
sleep_data['duration'] = sleep_data['endDate']-sleep_data['startDate']

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  sleep_data['duration'] = sleep_data['endDate']-sleep_data['startDate']


In [85]:
sleep_data.to_csv('sleep_data.csv')

In [12]:
sleep_data['sourceName'].unique()

array(['Horloge', 'Claire '], dtype=object)

In [13]:
sleep_data_claire = sleep_data.loc[sleep_data['sourceName'] == 'Claire ']

In [14]:
sleep_data_claire

Unnamed: 0,type,sourceName,sourceVersion,device,unit,creationDate,startDate,endDate,value,duration
181432,SleepAnalysis,Claire,14.2,,,2020-11-26 08:24:48+01:00,2020-11-25 23:45:00+01:00,2020-11-26 08:15:21+01:00,1.0,0 days 08:30:21
181433,SleepAnalysis,Claire,14.2,,,2020-11-26 08:24:48+01:00,2020-11-26 08:17:15+01:00,2020-11-26 08:20:09+01:00,1.0,0 days 00:02:54
181434,SleepAnalysis,Claire,14.2,,,2020-11-26 08:24:48+01:00,2020-11-26 08:20:25+01:00,2020-11-26 08:20:32+01:00,1.0,0 days 00:00:07
181435,SleepAnalysis,Claire,14.2,,,2020-11-26 08:24:48+01:00,2020-11-26 08:22:18+01:00,2020-11-26 08:24:48+01:00,1.0,0 days 00:02:30
181436,SleepAnalysis,Claire,14.2,,,2020-11-27 08:00:16+01:00,2020-11-27 00:18:59+01:00,2020-11-27 08:00:16+01:00,1.0,0 days 07:41:17
...,...,...,...,...,...,...,...,...,...,...
182434,SleepAnalysis,Claire,14.4.2,,,2021-12-14 08:15:02+01:00,2021-12-14 07:50:33+01:00,2021-12-14 07:54:15+01:00,1.0,0 days 00:03:42
182435,SleepAnalysis,Claire,14.4.2,,,2021-12-14 07:57:28+01:00,2021-12-14 07:54:53+01:00,2021-12-14 07:56:34+01:00,1.0,0 days 00:01:41
182436,SleepAnalysis,Claire,14.4.2,,,2021-12-14 08:15:02+01:00,2021-12-14 07:54:53+01:00,2021-12-14 07:56:34+01:00,1.0,0 days 00:01:41
182437,SleepAnalysis,Claire,14.4.2,,,2021-12-14 08:15:02+01:00,2021-12-14 07:59:14+01:00,2021-12-14 08:15:02+01:00,1.0,0 days 00:15:48


In [86]:
sleep_data_claire_2 = sleep_data_claire.loc[sleep_data_claire['duration'] > dt.timedelta(hours = 3)]

In [87]:
sleep_data_claire_2.to_csv('sleep_data.csv')

In [88]:
sleep_data_claire_2['duration'].mean()

Timedelta('0 days 06:52:08.545171339')

In [20]:
sleep_2 = sleep_data_claire.sort_values('duration', ascending=True, ignore_index=True)

In [21]:
print(sleep_2)

               type sourceName sourceVersion device unit  \
0     SleepAnalysis    Claire         14.4.2    NaN  NaN   
1     SleepAnalysis    Claire         14.4.2    NaN  NaN   
2     SleepAnalysis    Claire         14.4.2    NaN  NaN   
3     SleepAnalysis    Claire           14.4    NaN  NaN   
4     SleepAnalysis    Claire         14.4.2    NaN  NaN   
...             ...        ...           ...    ...  ...   
1002  SleepAnalysis    Claire         14.4.2    NaN  NaN   
1003  SleepAnalysis    Claire           14.2    NaN  NaN   
1004  SleepAnalysis    Claire         14.4.1    NaN  NaN   
1005  SleepAnalysis    Claire           14.2    NaN  NaN   
1006  SleepAnalysis    Claire         14.4.2    NaN  NaN   

                  creationDate                 startDate  \
0    2021-05-22 07:59:32+01:00 2021-05-22 07:59:20+01:00   
1    2021-10-21 07:15:03+01:00 2021-10-21 07:01:40+01:00   
2    2021-06-11 07:18:25+01:00 2021-06-11 07:09:36+01:00   
3    2021-02-11 09:03:17+01:00 2021-02-

In [20]:
health_data = data.loc[data['type'] != 'SleepAnalysis']

In [42]:
health_data['CreationDatebis'] = health_data['creationDate'].dt.date
health_data

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  health_data['CreationDatebis'] = health_data['creationDate'].dt.date


Unnamed: 0,type,sourceName,sourceVersion,device,unit,creationDate,startDate,endDate,value,CreationDatebis
0,StepCount,Claire,13.0,"<<HKDevice: 0x282531540>, name:iPhone, manufac...",count,2019-09-21 23:23:37+01:00,2019-09-21 23:12:34+01:00,2019-09-21 23:15:07+01:00,88.0,2019-09-21
1,StepCount,Claire,13.0,"<<HKDevice: 0x282531540>, name:iPhone, manufac...",count,2019-09-21 23:53:13+01:00,2019-09-21 23:42:10+01:00,2019-09-21 23:42:13+01:00,8.0,2019-09-21
2,StepCount,iPhone de Claire,12.1.3,"<<HKDevice: 0x282530aa0>, name:iPhone, manufac...",count,2019-02-02 20:06:09+01:00,2019-02-02 19:55:46+01:00,2019-02-02 20:05:18+01:00,450.0,2019-02-02
3,StepCount,iPhone de Claire,12.1.3,"<<HKDevice: 0x282530aa0>, name:iPhone, manufac...",count,2019-02-02 20:23:30+01:00,2019-02-02 20:05:18+01:00,2019-02-02 20:05:58+01:00,65.0,2019-02-02
4,StepCount,iPhone de Claire (2),,,count,2015-05-07 23:07:36+01:00,2015-05-07 21:37:56+01:00,2015-05-07 21:38:04+01:00,26.0,2015-05-07
...,...,...,...,...,...,...,...,...,...,...
181315,WalkingAsymmetryPercentage,Claire,14.4.2,"<<HKDevice: 0x2825d41e0>, name:iPhone, manufac...",%,2021-12-15 19:11:10+01:00,2021-12-15 18:22:56+01:00,2021-12-15 18:25:05+01:00,0.0,2021-12-15
181316,WalkingAsymmetryPercentage,Claire,14.4.2,"<<HKDevice: 0x2825d41e0>, name:iPhone, manufac...",%,2021-12-15 19:11:10+01:00,2021-12-15 18:58:05+01:00,2021-12-15 18:59:04+01:00,0.0,2021-12-15
181317,WalkingAsymmetryPercentage,Claire,14.4.2,"<<HKDevice: 0x2825d41e0>, name:iPhone, manufac...",%,2021-12-15 21:45:05+01:00,2021-12-15 19:17:45+01:00,2021-12-15 19:18:05+01:00,0.0,2021-12-15
181318,WalkingAsymmetryPercentage,Claire,14.4.2,"<<HKDevice: 0x2825d41e0>, name:iPhone, manufac...",%,2021-12-15 21:45:05+01:00,2021-12-15 19:18:20+01:00,2021-12-15 19:18:44+01:00,0.0,2021-12-15


In [67]:
health_data
health_data.to_csv('health_data.csv') 

In [43]:
# health_data_steps are data related to step count
health_data_steps = health_data.loc[health_data['type'] == 'StepCount']
# health_data_distance are data related to Distance Walking and Running
health_data_distance = health_data.loc[health_data['type'] == 'DistanceWalkingRunning']

In [89]:
import math
health_data_distance_grouped = health_data_distance.groupby(['CreationDatebis']).sum()
health_data_steps_grouped = health_data_steps.groupby(['CreationDatebis']).sum()

meandistance = str(math.ceil(health_data_distance_grouped.mean()[0]))
meansteps = str(math.ceil(health_data_steps_grouped.mean()[0]))
print ('Daily average steps:',meansteps)
print('Daily average distance:', meandistance)

Daily average steps: 4966
Daily average distance: 4


In [96]:
health_data_distance_2019 = health_data_distance.loc[health_data_distance['CreationDatebis']==2019]
display(health_data_distance_2019)

Unnamed: 0,type,sourceName,sourceVersion,device,unit,creationDate,startDate,endDate,value,CreationDatebis


In [93]:
import plotly.express as px
fig = px.bar(New_df, x='CreationDatebis', y='value')
fig.show()

In [77]:
health_data_steps_grouped

Unnamed: 0_level_0,value
CreationDatebis,Unnamed: 1_level_1
2015-05-07,26.0
2015-06-07,620.0
2015-06-08,501.0
2015-06-09,6785.0
2015-06-10,4179.0
...,...
2021-12-11,3024.0
2021-12-12,450.0
2021-12-13,437.0
2021-12-14,3967.0


In [79]:
health_data_distance

Unnamed: 0,type,sourceName,sourceVersion,device,unit,creationDate,startDate,endDate,value,CreationDatebis
56673,DistanceWalkingRunning,Claire,13.0,"<<HKDevice: 0x2825dc0a0>, name:iPhone, manufac...",km,2019-09-21 23:23:37+01:00,2019-09-21 23:12:34+01:00,2019-09-21 23:15:07+01:00,0.06509,2019-09-21
56674,DistanceWalkingRunning,Claire,13.0,"<<HKDevice: 0x2825dc0a0>, name:iPhone, manufac...",km,2019-09-21 23:53:13+01:00,2019-09-21 23:42:10+01:00,2019-09-21 23:42:13+01:00,0.00600,2019-09-21
56675,DistanceWalkingRunning,iPhone de Claire,12.1.3,"<<HKDevice: 0x2825dd0e0>, name:iPhone, manufac...",km,2019-02-02 20:06:09+01:00,2019-02-02 19:55:46+01:00,2019-02-02 20:05:18+01:00,0.32956,2019-02-02
56676,DistanceWalkingRunning,iPhone de Claire,12.1.3,"<<HKDevice: 0x2825dd0e0>, name:iPhone, manufac...",km,2019-02-02 20:23:29+01:00,2019-02-02 20:05:18+01:00,2019-02-02 20:05:48+01:00,0.04504,2019-02-02
56677,DistanceWalkingRunning,iPhone de Claire (2),,,km,2015-05-07 23:07:36+01:00,2015-05-07 21:37:56+01:00,2015-05-07 21:38:04+01:00,0.01362,2015-05-07
...,...,...,...,...,...,...,...,...,...,...
123440,DistanceWalkingRunning,Claire,14.4.2,"<<HKDevice: 0x2825dbc00>, name:iPhone, manufac...",km,2021-12-15 18:55:37+01:00,2021-12-15 18:44:11+01:00,2021-12-15 18:50:43+01:00,0.12267,2021-12-15
123441,DistanceWalkingRunning,Claire,14.4.2,"<<HKDevice: 0x2825dbc00>, name:iPhone, manufac...",km,2021-12-15 19:09:02+01:00,2021-12-15 18:57:59+01:00,2021-12-15 19:07:55+01:00,0.25321,2021-12-15
123442,DistanceWalkingRunning,Claire,14.4.2,"<<HKDevice: 0x2825dbc00>, name:iPhone, manufac...",km,2021-12-15 19:31:37+01:00,2021-12-15 19:13:39+01:00,2021-12-15 19:20:25+01:00,0.30467,2021-12-15
123443,DistanceWalkingRunning,Claire,14.4.2,"<<HKDevice: 0x2825dbc00>, name:iPhone, manufac...",km,2021-12-15 20:02:07+01:00,2021-12-15 19:47:43+01:00,2021-12-15 19:47:46+01:00,0.00900,2021-12-15
