# Apple Health Extractor

This code will parse your Apple Health export data, create multiple CSV and do some simple data checks and data analysis. 

Enjoy! 

--------

## Setup and Usage NOTE

* Export your data from Apple Health App on your phone. 
* Unzip export.zip into this directory and rename to data. 
* Inside your directory there should be a directory and file here: /data/export.xml
* Run inside project or in the command line.

In [1]:
# %run -i 'apple-health-data-parser' 'export.xml' 
%run -i 'apple-health-data-parser' 'export.xml' 

Reading data from export.xml . . . done
Unexpected node of type ExportDate.

Tags:
ActivitySummary: 686
ExportDate: 1
Me: 1
Record: 1142965
Workout: 106

Fields:
HKCharacteristicTypeIdentifierBiologicalSex: 1
HKCharacteristicTypeIdentifierBloodType: 1
HKCharacteristicTypeIdentifierDateOfBirth: 1
HKCharacteristicTypeIdentifierFitzpatrickSkinType: 1
activeEnergyBurned: 686
activeEnergyBurnedGoal: 686
activeEnergyBurnedUnit: 686
appleExerciseTime: 686
appleExerciseTimeGoal: 686
appleStandHours: 686
appleStandHoursGoal: 686
creationDate: 1143071
dateComponents: 686
device: 1125552
duration: 106
durationUnit: 106
endDate: 1143071
sourceName: 1143071
sourceVersion: 1138201
startDate: 1143071
totalDistance: 106
totalDistanceUnit: 106
totalEnergyBurned: 106
totalEnergyBurnedUnit: 106
type: 1142965
unit: 1133858
value: 1142954
workoutActivityType: 106

Record types:
ActiveEnergyBurned: 525528
AppleExerciseTime: 11599
AppleStandHour: 9073
AppleStandTime: 4813
BasalEnergyBurned: 100290
BodyFatPer

-----

# Apple Health Data Check and Simple Data Analysis

In [1]:
import numpy as np
import pandas as pd
import glob
from datetime import date, datetime, timedelta as td
import pytz
import numpy as np
import pandas as pd

----

# Weight

In [2]:
weight = pd.read_csv("BodyMass.csv")

In [55]:
weight.tail()

Unnamed: 0,sourceName,sourceVersion,device,type,unit,creationDate,startDate,endDate,value
176,Mi Fit,201907081918,,BodyMass,kg,2020-07-02 07:52:37 +0530,2020-07-02 07:52:31 +0530,2020-07-02 07:52:31 +0530,88.8
177,Mi Fit,201907081918,,BodyMass,kg,2020-07-04 09:09:36 +0530,2020-07-04 09:09:25 +0530,2020-07-04 09:09:25 +0530,90.9
178,Mi Fit,201907081918,,BodyMass,kg,2020-07-05 09:03:03 +0530,2020-07-04 09:10:52 +0530,2020-07-04 09:10:52 +0530,89.4
179,Mi Fit,201907081918,,BodyMass,kg,2020-07-05 09:03:03 +0530,2020-07-05 09:02:55 +0530,2020-07-05 09:02:55 +0530,88.9
180,Mi Fit,201907081918,,BodyMass,kg,2020-07-06 08:33:11 +0530,2020-07-06 08:33:05 +0530,2020-07-06 08:33:05 +0530,88.3


In [56]:
weight.describe()

Unnamed: 0,device,value
count,0.0,181.0
mean,,88.637569
std,,0.806861
min,,84.2
25%,,88.3
50%,,88.6
75%,,89.1
max,,90.9


----

## Steps

In [114]:
steps = pd.read_csv("StepCount.csv")

In [115]:
len(steps)

174943

In [116]:
steps.columns

Index(['sourceName', 'sourceVersion', 'device', 'type', 'unit', 'creationDate',
       'startDate', 'endDate', 'value'],
      dtype='object')

In [117]:
steps.describe()

Unnamed: 0,value
count,174943.0
mean,82.619207
std,214.041698
min,1.0
25%,17.0
50%,40.0
75%,90.0
max,43109.0


In [118]:
# TRIAL CODE TO GROUPBY
# a = len(steps.index)
# for i in range(0,a):
#     steps['creationDate'][i] =steps['creationDate'][i].replace('-',':')[0:19].replace(" ", ":")
#     steps['startDate'][i] =steps['startDate'][i].replace('-',':')[0:19].replace(" ", ":")
#     steps['endDate'][i] =steps['endDate'][i].replace('-',':')[0:19].replace(" ", ":")
#     print(i)
# print(steps['creationDate'])
# steps['creationDate'][0] =steps['creationDate'][0].replace('-',':')[0:19].replace(" ", ":") 
# print(steps['creationDate'][0])


In [119]:
# functions to convert UTC to Kolkata time zone and extract date/time elements
convert_tz = lambda x: x.to_pydatetime().replace(tzinfo=pytz.utc).astimezone(pytz.timezone('Asia/Kolkata'))
get_year = lambda x: convert_tz(x).year
get_month = lambda x: '{}-{:02}'.format(convert_tz(x).year, convert_tz(x).month) #inefficient
get_date = lambda x: '{}-{:02}-{:02}'.format(convert_tz(x).year, convert_tz(x).month, convert_tz(x).day) #inefficient
get_day = lambda x: convert_tz(x).day
get_hour = lambda x: convert_tz(x).hour
get_minute = lambda x: convert_tz(x).minute
get_day_of_week = lambda x: convert_tz(x).weekday()

In [120]:
# parse out date and time elements as Kolkata time
steps['startDate'] = pd.to_datetime(steps['startDate'])
steps['year'] = steps['startDate'].map(get_year)
steps['month'] = steps['startDate'].map(get_month)
steps['date'] = steps['startDate'].map(get_date)
steps['day'] = steps['startDate'].map(get_day)
steps['hour'] = steps['startDate'].map(get_hour)
steps['minute'] = steps['startDate'].map(get_minute)
steps['dow'] = steps['startDate'].map(get_day_of_week)

In [125]:
steps_by_date = steps.groupby(['date'])['value'].sum().reset_index(name='Steps')
steps_by_date.head()

Unnamed: 0,date,Steps
0,2015-12-21,4355
1,2015-12-22,4389
2,2015-12-23,6566
3,2015-12-24,5180
4,2015-12-25,4498


In [126]:
steps_by_date['date'] = pd.to_datetime(steps_by_date['date'])
steps_by_date['dow'] = steps_by_date['date'].dt.weekday
steps_by_date['weekNo'] = steps_by_date['dow']
ref_for_week = steps_by_date['dow'][0]
p = 0
for i in range(0,len(steps_by_date)):
    if steps_by_date['dow'][i] == ref_for_week:
        p = p+1
    steps_by_date['weekNo'][i] = p
steps_by_date.to_csv("steps_per_day_shashank.csv", index=False)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if __name__ == '__main__':


In [129]:
InsightLength = 12
week_num = 221
# len(steps_by_date)
i = 7*(week_num - 2)
# steps_by_date = steps_by_date.to_numpy
steps_by_date[i:i+7*InsightLength]['Steps']

# np.unique(steps_by_date)

1533     9586
1534    10094
1535    17906
1536    13233
1537    23340
        ...  
1612     9640
1613    23457
1614    14834
1615    20820
1616    14632
Name: Steps, Length: 84, dtype: int64

In [37]:
# grouping data by week and storing in table
steps_by_week = steps_by_date.groupby(['weekNo'])['Steps'].sum().reset_index(name='Steps')

In [14]:
steps_by_week['stdDev'] = steps_by_date.groupby(['weekNo'])['Steps'].std()
steps_by_week

Unnamed: 0,weekNo,Steps,stdDev
0,1,30513,
1,2,42378,1430.254290
2,3,42809,2658.213310
3,4,31654,2795.473595
4,5,37155,1848.365765
...,...,...,...
233,234,99240,4927.263497
234,235,129670,6948.434342
235,236,181365,8530.072034
236,237,148001,6071.716855


In [142]:
steps_by_week.to_csv("steps_per_week_shashank.csv", index=False)
steps_by_week.to_numpy()
steps_week_np = steps_by_week.to_numpy()
[steps_week_np[i][0] for i in range(0,len(steps_week_np))]

[1,
 2,
 3,
 4,
 5,
 6,
 7,
 8,
 9,
 10,
 11,
 12,
 13,
 14,
 15,
 16,
 17,
 18,
 19,
 20,
 21,
 22,
 23,
 24,
 25,
 26,
 27,
 28,
 29,
 30,
 31,
 32,
 33,
 34,
 35,
 36,
 37,
 38,
 39,
 40,
 41,
 42,
 43,
 44,
 45,
 46,
 47,
 48,
 49,
 50,
 51,
 52,
 53,
 54,
 55,
 56,
 57,
 58,
 59,
 60,
 61,
 62,
 63,
 64,
 65,
 66,
 67,
 68,
 69,
 70,
 71,
 72,
 73,
 74,
 75,
 76,
 77,
 78,
 79,
 80,
 81,
 82,
 83,
 84,
 85,
 86,
 87,
 88,
 89,
 90,
 91,
 92,
 93,
 94,
 95,
 96,
 97,
 98,
 99,
 100,
 101,
 102,
 103,
 104,
 105,
 106,
 107,
 108,
 109,
 110,
 111,
 112,
 113,
 114,
 115,
 116,
 117,
 118,
 119,
 120,
 121,
 122,
 123,
 124,
 125,
 126,
 127,
 128,
 129,
 130,
 131,
 132,
 133,
 134,
 135,
 136,
 137,
 138,
 139,
 140,
 141,
 142,
 143,
 144,
 145,
 146,
 147,
 148,
 149,
 150,
 151,
 152,
 153,
 154,
 155,
 156,
 157,
 158,
 159,
 160,
 161,
 162,
 163,
 164,
 165,
 166,
 167,
 168,
 169,
 170,
 171,
 172,
 173,
 174,
 175,
 176,
 177,
 178,
 179,
 180,
 181,
 182,
 183,
 184,
 185

In [115]:
def setofInsightMonthly(steps_week,threeWeek = False,twoWeek = False):
#   THIS FUNCTION FINDS and STORES THE INSIGHTS ON THE BASIS OF A 4WEEK/28DAY PERIOD 
#   ALSO FINDS ON THE BASIS OF 3 and 2 weeks
    steps_week_np = steps_week.to_numpy()
    steps_12week = np.flip(steps_week_np[len(steps_week_np)-13:len(steps_week_np)-2],axis = 0) #flipping the last to the first for easier access to indices 
    sliding_insight_four_week = {'mean':np.zeros(len(steps_12week) - 4),'stdDev':np.zeros(len(steps_12week) - 4)} #hardcoded sliding possibilities according to a month
    sliding_insight_three_week = {'mean':np.zeros(len(steps_12week) - 3),'stdDev':np.zeros(len(steps_12week) - 3)}
    sliding_insight_two_week = {'mean':np.zeros(len(steps_12week) - 2),'stdDev':np.zeros(len(steps_12week) - 2)}
#     finding mean of Grouped weekly data
    sliding_insight_four_week['mean'] = [np.mean(steps_12week[i:i+4,1]) for i in range(0,len(steps_12week)-4)]
    if threeWeek:
        sliding_insight_three_week['mean'] = [np.mean(steps_12week[i:i+3,1]) for i in range(0,len(steps_12week)-3)]
    if twoWeek:
        sliding_insight_two_week['mean'] = [(np.mean(steps_12week[i:i+2,1])) for i in range(0,len(steps_12week)-2)]
    return sliding_insight_four_week,sliding_insight_three_week,sliding_insight_two_week

A,B,C = setofInsightMonthly(steps_by_day,True,True)
print(A)
print(B)
print(C)

{'mean': [134636.25, 126032.75, 131760.25, 146805.5, 151290.0, 147402.5, 131213.25], 'stdDev': array([0., 0., 0., 0., 0., 0., 0.])}
{'mean': [136758.33333333334, 119060.0, 124820.33333333333, 142600.33333333334, 152984.0, 152736.33333333334, 145676.66666666666, 121810.66666666667], 'stdDev': array([0., 0., 0., 0., 0., 0., 0., 0.])}
{'mean': [155517.5, 114455.0, 113755.0, 137610.5, 149765.5, 156000.5, 152814.5, 138804.5, 109612.0], 'stdDev': array([0., 0., 0., 0., 0., 0., 0., 0., 0.])}


In [447]:
# Present this to sushant

import scipy.stats
def csv_to_pd():
    steps_per_day = pd.read_csv('steps_per_day_shashank.csv')
    steps_per_week = pd.read_csv('steps_per_week_shashank.csv')
    return steps_per_day,steps_per_week

def findingDaysfromWeek(week_num,insightDuration,daily_data):
    # week start should be a multiple of 7 or 0
    # might have to make it week_num - 1 if the year is normal currently leap year so week_num-2
    if week_num < 100:
        week_start = 7*(week_num-1)
    else:
        week_start = 7*(week_num-2)
    return daily_data[week_start:week_start+7*insightDuration]['Steps'], daily_data[week_start:week_start+7*10]['Steps']
# combining this with 
def setofInsightMonthly(steps_week,threeWeek = False,twoWeek = False):
#   THIS FUNCTION FINDS and STORES THE INSIGHTS ON THE BASIS OF A 4WEEK/28DAY PERIOD 
#   ALSO FINDS ON THE BASIS OF 3 and 2 weeks
    steps_week_np = steps_week.to_numpy()
    temp = steps_week_np[len(steps_week_np)-12:len(steps_week_np)-1]
#     steps_12week = np.flip(steps_week_np[len(steps_week_np)-12:len(steps_week_np)],axis = 0) #flipping the last to the first for easier access to indices 
    steps_12week = steps_week_np[len(steps_week_np)-12:len(steps_week_np)-1]
    weeknum = np.unique([steps_12week[i][0] for i in range(0,len(steps_12week))]) #finding unique week numbers from which insights need to be extracted
    print(weeknum)
    sliding_insight_four_week = {'mean':np.zeros(len(steps_12week) - 3),'stdDev':np.zeros(len(steps_12week) - 3),'weeknum':[]} #hardcoded sliding possibilities according to a month
    sliding_insight_three_week = {'mean':np.zeros(len(steps_12week) - 2),'stdDev':np.zeros(len(steps_12week) - 2),'weeknum':[]}
    sliding_insight_two_week = {'mean':np.zeros(len(steps_12week) - 1),'stdDev':np.zeros(len(steps_12week) - 1),'weeknum':[]}
#     finding mean of Grouped weekly 
    sliding_insight_four_week['mean'] = [np.mean(steps_12week[i:i+4,1]) for i in range(0,len(steps_12week)-3)]
    sliding_insight_four_week['weeknum'] = [weeknum[i:i+4][0] for i in range(0,len(steps_12week)-3)]
#     print(sliding_insight_four_week['weeknum'])
    if threeWeek:
        sliding_insight_three_week['mean'] = [np.mean(steps_12week[i:i+3,1]) for i in range(0,len(steps_12week)-2)]
        sliding_insight_three_week['weeknum'] = [weeknum[i:i+3][0] for i in range(0,len(steps_12week)-2)]
    if twoWeek:
        sliding_insight_two_week['mean'] = [(np.mean(steps_12week[i:i+2,1])) for i in range(0,len(steps_12week)-1)]
        sliding_insight_two_week['weeknum'] = [weeknum[i:i+2][0] for i in range(0,len(steps_12week)-1)]
    return sliding_insight_four_week,sliding_insight_three_week,sliding_insight_two_week

# CAN USE THIS OR STUDENT'S T TEST
def tTest(data1,data2,alpha):
    # calculate means
    data1 = data1.to_numpy()
    data2 = data2.to_numpy()
    mean1, mean2 = np.mean(data1), np.mean(data2)
    # number of paired samples
    n1 = len(data1)
    n2 = len(data2)
    # sum squared difference between observations
    s1 = np.std(data1)**2
    # sum difference between observations
    s2 = np.std(data2)**2
    # standard deviation of the difference between means
    den = np.sqrt(s1/len(data1) + s2/len(data2))
    # standard error of the difference between the means
    num = (mean1 - mean2)
    # calculate the t statistic
    t_stat = abs(num/den)
    # degrees of freedom
    df = ((s1**2/n1) + (s2**2/n2))**2/((s1**2/n1)**2/(n1-1) + (s2**2/n2)**2/(n2-1))
    # calculate the critical value
    cv = scipy.stats.t.ppf(1.0 - alpha, df)
    # calculate the p-value
    p = (1.0 - scipy.stats.t.cdf(abs(t_stat), df)) * 2.0
    return t_stat, df, cv, p
    # return everything
#     return data2

def gettingInsights():
    steps_per_day,steps_per_week = csv_to_pd()
#     print(steps_per_day)
    dict4week,dict3week,dict2week = setofInsightMonthly(steps_per_week,threeWeek = True,twoWeek = True)
#     print(dict4week['weeknum'])
    weekconsidered = 233
    insight2week,meandiff2week = weekInsight(weekconsidered,steps_per_day,dict2week,2)
    insight3week,meandiff3week = weekInsight(weekconsidered,steps_per_day,dict3week,3)
    insight4week,meandiff4week = weekInsight(weekconsidered,steps_per_day,dict4week,4)
#     { CODE SNIPPET TO SAVE MONTHLY DATA INTO A CSV FILE
#     array1 = np.transpose(np.array([dict2week['weeknum'],dict2week['mean']]))
#     array2 = np.transpose(np.array([dict3week['weeknum'],dict3week['mean']]))
#     array3 = np.transpose(np.array([dict4week['weeknum'],dict4week['mean']]))
#     print(array1)
    
#     np.savetxt("meandiff_grouped_2week.csv", array1, delimiter=",")
#     np.savetxt("meandiff_grouped_3week.csv", array2, delimiter=",")
#     np.savetxt("meandiff_grouped_4week.csv", array3, delimiter=",")
#     }
    weekNo2week,maxdiff2week = printGroupedInsightsHelper(insight2week,meandiff2week)
    weekNo3week,maxdiff3week = printGroupedInsightsHelper(insight3week,meandiff3week)
    weekNo4week,maxdiff4week = printGroupedInsightsHelper(insight4week,meandiff4week)
    if printGroupInsight(weekNo2week,meandiff2week,insight2week):
        print('Great job! your steps have increased from week '+str(weekNo2week)+' to '+str(weekNo2week+1)+' by '+str(round(maxdiff2week*14))+' steps')
    else:
        print('Please keep walking, your performance went down from week '+str(weekNo2week)+' to '+str(weekNo2week+1)+' by '+str(round(maxdiff2week*14))+' steps')
    if printGroupInsight(weekNo3week,meandiff3week,insight3week):
        print('Great job! your steps have increased from week '+str(weekNo3week)+' to '+str(weekNo3week+2)+' by '+str(round(maxdiff3week*21))+' steps')
    else:
        print('Please keep walking your performance went down from week '+str(weekNo3week)+' to '+str(weekNo3week+2)+' by '+str(round(maxdiff3week*21))+' steps')

    if printGroupInsight(weekNo4week,meandiff4week,insight4week):
        print('Great job! your steps have increased from week '+str(weekNo4week)+' to '+str(weekNo4week+3)+' by '+str(round(maxdiff4week*28))+' steps')
    else:
        print('Please keep walking, your performance went down from week '+str(weekNo4week)+' to '+str(weekNo4week+3)+' by '+str(round(maxdiff4week*28))+' steps')

def weekInsight(weekconsidered,steps_per_day,WeekgroupedData,groupSize):
    dailydatanumpy1,dailydata12week = findingDaysfromWeek(weekconsidered,groupSize,steps_per_day)
    a = np.array(WeekgroupedData['weeknum'])
    weekIndexGrouped = list(np.where(a == weekconsidered))
#     print(weekIndexGrouped[0])
#     print(WeekgroupedData['weeknum'])
    if len(weekIndexGrouped[0]) == 0:
        insight = []
        meandiff = []
        return insight,meandiff
    weekIndex = int(weekIndexGrouped[0])
    insight = []
    meandiff = []
    for i in range(0,len(WeekgroupedData['weeknum'])):
        dailydatanumpy2,_ = findingDaysfromWeek(WeekgroupedData['weeknum'][i],groupSize,steps_per_day)
        t_stat,_,_,p =  tTest(dailydatanumpy1,dailydatanumpy2,0.05)
        if t_stat > p and weekIndex > i:
            insight.append(WeekgroupedData['weeknum'][i])
            meandiff.append(WeekgroupedData['mean'][weekIndex] - WeekgroupedData['mean'][i])
    # two 4 week insight comparison
    return insight,meandiff
def printGroupedInsightsHelper(insight,meandiff):
    meandiffabs = [abs(number) for number in meandiff]
    print(meandiffabs)
    maxdiff = np.amax(meandiffabs)
    index = np.where(meandiffabs == maxdiff)
    return insight[int(index[0])],meandiffabs[int(index[0])]
def printGroupInsight(insightweek,meandiff,weeknum):
    indexInsight = np.where(weeknum == insightweek)
    if meandiff[int(indexInsight[0])] > 0:
        return True
    else:
        return False
gettingInsights()

[227 228 229 230 231 232 233 234 235 236 237]
[25049.5, 39059.5, 42245.5, 36010.5, 23855.5]
[26616.666666666657, 33676.33333333334, 33924.0, 23540.333333333343]
[12766.25, 16653.75, 12169.25, 8603.5]
Please keep walking, your performance went down from week 230 to 231 by 591437.0 steps
Please keep walking your performance went down from week 230 to 232 by 712404.0 steps
Please keep walking, your performance went down from week 229 to 232 by 466305.0 steps


In [343]:
#dont look at this
[227 228 229 230 231 232 233 234 235 236 237]
[236, 235, 234, 233, 232, 231, 230, 229, 228]
[235, 234, 233, 232, 231, 230, 229, 228]
[235, 234, 233, 232, 231, 230, 229, 228]

SyntaxError: invalid syntax (<ipython-input-343-b552b52ba88d>, line 1)

## Stand Count

In [187]:
stand = pd.read_csv("AppleStandHour.csv")

In [188]:
len(stand)

9073

In [181]:
stand.columns

Index(['sourceName', 'sourceVersion', 'device', 'type', 'unit', 'creationDate',
       'startDate', 'endDate', 'value'],
      dtype='object')

In [182]:
stand.tail()

Unnamed: 0,sourceName,sourceVersion,device,type,unit,creationDate,startDate,endDate,value
9068,Shashank’s Apple Watch,6.1.3,"<<HKDevice: 0x2826a4be0>, name:Apple Watch, ma...",AppleStandHour,,2020-07-05 19:01:23 +0530,2020-07-05 19:00:00 +0530,2020-07-05 20:00:00 +0530,HKCategoryValueAppleStandHourStood
9069,Shashank’s Apple Watch,6.1.3,"<<HKDevice: 0x2826a4cd0>, name:Apple Watch, ma...",AppleStandHour,,2020-07-05 20:11:10 +0530,2020-07-05 20:00:00 +0530,2020-07-05 21:00:00 +0530,HKCategoryValueAppleStandHourStood
9070,Shashank’s Apple Watch,6.1.3,"<<HKDevice: 0x2826a4dc0>, name:Apple Watch, ma...",AppleStandHour,,2020-07-05 21:04:22 +0530,2020-07-05 21:00:00 +0530,2020-07-05 22:00:00 +0530,HKCategoryValueAppleStandHourStood
9071,Shashank’s Apple Watch,6.1.3,"<<HKDevice: 0x2826a4eb0>, name:Apple Watch, ma...",AppleStandHour,,2020-07-05 22:01:13 +0530,2020-07-05 22:00:00 +0530,2020-07-05 23:00:00 +0530,HKCategoryValueAppleStandHourStood
9072,Shashank’s Apple Watch,6.1.3,"<<HKDevice: 0x2826a4fa0>, name:Apple Watch, ma...",AppleStandHour,,2020-07-05 23:35:54 +0530,2020-07-05 23:00:00 +0530,2020-07-06 00:00:00 +0530,HKCategoryValueAppleStandHourStood


In [183]:
# stand['value'] = stand['endDate'] - stand['startDate']
a = ['i' for i in range(0,len(stand['endDate']))]
# for i in range(0,len(stand['endDate'])):
    
    
for i in range(0,len(stand['endDate'])):
    t1 = datetime.strptime(stand['endDate'][i][0:19], '%Y%m%d%H%M%S')
#     t1 = datetime.time(*map(int, stand['endDate'][i][12:19].split(':')))
#     t2 = datetime.time(*map(int, stand['startDate'][i][12:19].split(':')))
    t2 = datetime.strptime(stand['startDate'][i][0:19], '%Y%m%d%H%M%S')
    a[i] =  t1-t2
stand['value'] = a
print(stand['value'].tail())

ValueError: time data '2017-11-18 02:30:00' does not match format '%Y%m%d%H%M%S'

------

## Resting Heart Rate (HR)

In [18]:
restingHR = pd.read_csv("RestingHeartRate.csv")

In [19]:
len(restingHR)

645

In [20]:
restingHR.describe()

Unnamed: 0,device,value
count,0.0,645.0
mean,,69.809302
std,,5.422455
min,,50.0
25%,,67.0
50%,,69.0
75%,,72.0
max,,98.0


---

## Walking Heart Rate (HR) Average

In [21]:
walkingHR = pd.read_csv("WalkingHeartRateAverage.csv")

In [22]:
len(walkingHR)

539

In [23]:
walkingHR.describe()

Unnamed: 0,device,value
count,0.0,539.0
mean,,99.084416
std,,11.996546
min,,72.5
25%,,91.0
50%,,97.0
75%,,104.0
max,,143.0


---

## Heart Rate Variability (HRV)

In [24]:
hrv = pd.read_csv("HeartRateVariabilitySDNN.csv")

In [25]:
len(hrv)

1687

In [26]:
hrv.columns

Index(['sourceName', 'sourceVersion', 'device', 'type', 'unit', 'creationDate',
       'startDate', 'endDate', 'value'],
      dtype='object')

In [27]:
hrv.describe()

Unnamed: 0,value
count,1687.0
mean,33.308511
std,13.458962
min,7.32718
25%,23.76185
50%,31.0815
75%,40.1322
max,160.64


In [28]:
hrv.tail()

Unnamed: 0,sourceName,sourceVersion,device,type,unit,creationDate,startDate,endDate,value
1682,Shashank’s Apple Watch,6.1.3,"<<HKDevice: 0x282780a00>, name:Apple Watch, ma...",HeartRateVariabilitySDNN,ms,2020-07-05 11:54:27 +0530,2020-07-05 11:53:26 +0530,2020-07-05 11:54:27 +0530,21.7083
1683,Shashank’s Apple Watch,6.1.3,"<<HKDevice: 0x282784af0>, name:Apple Watch, ma...",HeartRateVariabilitySDNN,ms,2020-07-05 13:55:48 +0530,2020-07-05 13:54:43 +0530,2020-07-05 13:55:48 +0530,38.0041
1684,Shashank’s Apple Watch,6.1.3,"<<HKDevice: 0x282785a90>, name:Apple Watch, ma...",HeartRateVariabilitySDNN,ms,2020-07-05 17:54:11 +0530,2020-07-05 17:53:06 +0530,2020-07-05 17:54:11 +0530,31.3568
1685,Shashank’s Apple Watch,6.1.3,"<<HKDevice: 0x282786260>, name:Apple Watch, ma...",HeartRateVariabilitySDNN,ms,2020-07-05 18:09:13 +0530,2020-07-05 18:08:10 +0530,2020-07-05 18:09:13 +0530,30.0479
1686,Shashank’s Apple Watch,6.1.3,"<<HKDevice: 0x2827863f0>, name:Apple Watch, ma...",HeartRateVariabilitySDNN,ms,2020-07-05 21:55:17 +0530,2020-07-05 21:54:11 +0530,2020-07-05 21:55:17 +0530,28.2694


-------

## VO2 Max

In [29]:
vo2max = pd.read_csv("VO2Max.csv")

In [30]:
len(vo2max)

57

In [31]:
vo2max.describe()

Unnamed: 0,sourceVersion,device,value
count,0.0,0.0,57.0
mean,,,33.181767
std,,,2.716489
min,,,29.8837
25%,,,31.7687
50%,,,31.8863
75%,,,33.9545
max,,,41.4593


----

## Blood Pressure

In [32]:
diastolic = pd.read_csv("BloodPressureDiastolic.csv")
systolic = pd.read_csv("BloodPressureSystolic.csv")

FileNotFoundError: File b'BloodPressureDiastolic.csv' does not exist

In [33]:
diastolic.describe()

NameError: name 'diastolic' is not defined

In [34]:
systolic.describe()

NameError: name 'systolic' is not defined

------

## Sleep

In [41]:
sleep = pd.read_csv("SleepAnalysis.csv")
sleep['unit'] = 'hours'

In [42]:
sleep.tail()

Unnamed: 0,sourceName,sourceVersion,device,type,unit,creationDate,startDate,endDate,value
17,Dozee,1,,SleepAnalysis,hours,2020-06-12 00:01:58 +0530,2019-04-07 00:55:05 +0530,2019-04-07 06:57:33 +0530,HKCategoryValueSleepAnalysisAsleep
18,Dozee,1,,SleepAnalysis,hours,2020-06-12 00:02:37 +0530,2019-04-07 00:53:05 +0530,2019-04-07 06:57:33 +0530,HKCategoryValueSleepAnalysisInBed
19,Dozee,1,,SleepAnalysis,hours,2020-06-12 00:02:37 +0530,2019-04-07 00:55:05 +0530,2019-04-07 06:57:33 +0530,HKCategoryValueSleepAnalysisAsleep
20,Dozee,1,,SleepAnalysis,hours,2020-06-12 00:03:58 +0530,2019-04-07 00:53:05 +0530,2019-04-07 06:57:33 +0530,HKCategoryValueSleepAnalysisInBed
21,Dozee,1,,SleepAnalysis,hours,2020-06-12 00:03:58 +0530,2019-04-07 00:55:05 +0530,2019-04-07 06:57:33 +0530,HKCategoryValueSleepAnalysisAsleep


In [37]:
sleep.describe()

Unnamed: 0,sourceVersion,unit
count,22.0,0.0
mean,18.818182,
std,24.125932,
min,1.0,
25%,1.0,
50%,1.0,
75%,50.0,
max,50.0,
