In [1]:
import xmltodict
import re

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import helper
import dataloader

In [2]:
def labelcleaner(df):
    df.columns = [
        helper._cleanlabel(col) for col in df.columns
    ]
    if df.index.name:
        df.index.rename(
            helper._cleanlabel(df.index.name),
            inplace=True
        )
    return df

In [3]:
health_datapath = '/Users/ganesh/Library/Mobile Documents/com~apple~CloudDocs/SharedFolder/export.xml'
sleep_datapath = '/Users/ganesh/Library/Mobile Documents/com~apple~CloudDocs/SharedFolder/AutoSleep.csv'

In [4]:
with open(health_datapath, 'r', errors='ignore') as xml_file:
    health = xmltodict.parse(xml_file.read())
    

In [5]:
healthrecords = health['HealthData']['Record']
workoutrecords = health['HealthData']['Workout']

In [6]:
healthdata = pd.DataFrame(healthrecords)
workoutdata = pd.DataFrame(workoutrecords)
sleepdata = pd.read_csv(sleep_datapath)

In [7]:
health_rawsample = healthdata.sample(1000)
workout_rawsample = workoutdata.copy()
sleep_rawsample = sleepdata.copy()

In [8]:
health_rawsample.info()
workout_rawsample.info()
sleep_rawsample.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1000 entries, 656241 to 664833
Data columns (total 11 columns):
 #   Column                            Non-Null Count  Dtype 
---  ------                            --------------  ----- 
 0   @type                             1000 non-null   object
 1   @sourceName                       1000 non-null   object
 2   @sourceVersion                    1000 non-null   object
 3   @unit                             986 non-null    object
 4   @creationDate                     1000 non-null   object
 5   @startDate                        1000 non-null   object
 6   @endDate                          1000 non-null   object
 7   @value                            1000 non-null   object
 8   @device                           971 non-null    object
 9   MetadataEntry                     143 non-null    object
 10  HeartRateVariabilityMetadataList  1 non-null      object
dtypes: object(11)
memory usage: 93.8+ KB
<class 'pandas.core.frame.DataFrame'>


In [9]:
healthdata = labelcleaner(healthdata)
workoutdata = labelcleaner(workoutdata)
sleepdata = labelcleaner(sleepdata)

In [10]:
healthdata.columns

Index(['type', 'sourcename', 'sourceversion', 'unit', 'creationdate',
       'startdate', 'enddate', 'value', 'device', 'metadataentry',
       'heartratevariabilitymetadatalist'],
      dtype='object')

In [11]:
workoutdata.columns

Index(['workoutactivitytype', 'duration', 'durationunit', 'totaldistance',
       'totaldistanceunit', 'totalenergyburned', 'totalenergyburnedunit',
       'sourcename', 'sourceversion', 'device', 'creationdate', 'startdate',
       'enddate', 'metadataentry', 'workoutevent', 'workoutroute'],
      dtype='object')

In [12]:
sleepdata.columns

Index(['iso8601', 'fromdate', 'todate', 'bedtime', 'waketime', 'inbed',
       'awake', 'fellasleepin', 'sessions', 'asleep', 'asleepavg7',
       'efficiency', 'efficiencyavg7', 'quality', 'qualityavg7', 'deep',
       'deepavg7', 'sleepbpm', 'sleepbpmavg7', 'daybpm', 'daybpmavg7',
       'wakingbpm', 'wakingbpmavg7', 'hrv', 'hrvavg7', 'sleephrv',
       'sleephrvavg7', 'spo2avg', 'spo2min', 'spo2max', 'respavg', 'respmin',
       'respmax', 'tags', 'notes'],
      dtype='object')

In [13]:
## Apple health data cleaner
## @helper.cleanlabel
def cleansehealthdata(df):
    return (df
     .dropna(thresh = df.shape[0]//4, axis=1)
     .rename(columns ={'type': 'datatype'})
     .assign(creationdate = lambda x: pd.to_datetime(x['creationdate']).dt.tz_convert(None),
             startdate = lambda x: pd.to_datetime(x['startdate']).dt.tz_convert(None),
             enddate = lambda x: pd.to_datetime(x['enddate']).dt.tz_convert(None),
             datatype = lambda x: x['datatype'].str.replace('HKQuantityTypeIdentifier', ''),
             device = lambda x: x['device'].fillna('No device'))
     .query("value != 'HKCategoryValueSleepAnalysisInBed'")
     .query("datatype in ['BodyMassIndex', 'Height', 'BodyMass', 'HeartRate', \
                        'RespiratoryRate', 'BodyFatPercentage', 'LeanBodyMass',\
                        'StepCount', 'DistanceWalkingRunning', 'BasalEnergyBurned',\
                        'ActiveEnergyBurned', 'FlightsClimbed',\
                        'AppleExerciseTime', 'RestingHeartRate', 'VO2Max',\
                        'WalkingHeartRateAverage', 'EnvironmentalAudioExposure',\
                        'HeadphoneAudioExposure', 'WalkingDoubleSupportPercentage',\
                        'SixMinuteWalkTestDistance', 'AppleStandTime', 'WalkingSpeed',\
                        'WalkingStepLength', 'WalkingAsymmetryPercentage',\
                        'HKDataTypeSleepDurationGoal', 'AppleWalkingSteadiness',\
                        'HeartRateVariabilitySDNN'] ")
     .dropna()
     .astype({'datatype':'category', 'sourcename':'category','value':'float'})
     )


## Heart watch workout data cleaner
## @helper.cleanlabel
def cleanseworkoutdata(df):
    return (
        df
        .dropna(thresh = df.shape[0]//4, axis=1)
        .drop(['sourceversion'],
            axis = 1, errors='ignore')
        .rename(columns = {'workoutactivitytype': 'workouttype', 'metadataentry':'workoutname'})
        .assign(
            workouttype = lambda x: x['workouttype'].str.replace('HKWorkoutActivityType', ''),
            workoutname = lambda x: x['workoutname'].apply(lambda x: x.get('@value')if isinstance(x, dict) else 'unknown'),
            creationdate = lambda x: pd.to_datetime(x['creationdate']).dt.tz_convert(None),
            startdate = lambda x: pd.to_datetime(x['startdate']).dt.tz_convert(None),
            enddate = lambda x: pd.to_datetime(x['enddate']).dt.tz_convert(None),
            device = lambda x: x['device'].fillna('No device')
            )
        .astype({'workouttype':'category', 'sourcename':'category', 'workoutname': 'category'})
        .astype({'duration':'float', 'totaldistance':'float', 'totalenergyburned': 'float'})
        .dropna()
        )


## Heart watch workout data cleaner
## @helper.cleanlabel
def cleansesleepdata(df):
    return (
      df.drop(['asleepavg7','efficiencyavg7','qualityavg7', 'deepavg7', 
            'sleepbpmavg7',  'daybpmavg7','wakingbpmavg7', 'hrvavg7',
            'sleephrvavg7', 'spo2avg', 'spo2min', 'spo2max', 'tags', 'notes'],
            axis = 1, errors='ignore')
        .dropna(thresh = df.shape[0]//4, axis=1)
        .rename(columns ={'iso8601': 'maindate'})
        .assign(maindate = lambda x: pd.to_datetime(x['maindate'])
                                        .dt.tz_convert(None).dt.normalize(),
                fromdate = lambda x: pd.to_datetime(x['fromdate']),
                todate = lambda x: pd.to_datetime(x['todate']),
                bedtime = lambda x: pd.to_datetime(x['bedtime']),
                waketime = lambda x: pd.to_datetime(x['waketime']),
                inbed= lambda x: pd.to_timedelta(x['inbed']),
                awake= lambda x: pd.to_timedelta(x['awake']),
                fellasleepin= lambda x: pd.to_timedelta(x['fellasleepin']),
                asleep= lambda x: pd.to_timedelta(x['asleep']),
                quality= lambda x: pd.to_timedelta(x['quality']),
                deep= lambda x: pd.to_timedelta(x['deep']),
                )
    )
    #return df

In [14]:
healthdata = cleansehealthdata(healthdata)
workoutdata = cleanseworkoutdata(workoutdata)
sleepdata = cleansesleepdata(sleepdata)

In [15]:
healthdata.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 973928 entries, 0 to 986936
Data columns (total 9 columns):
 #   Column         Non-Null Count   Dtype         
---  ------         --------------   -----         
 0   datatype       973928 non-null  category      
 1   sourcename     973928 non-null  category      
 2   sourceversion  973928 non-null  object        
 3   unit           973928 non-null  object        
 4   creationdate   973928 non-null  datetime64[ns]
 5   startdate      973928 non-null  datetime64[ns]
 6   enddate        973928 non-null  datetime64[ns]
 7   value          973928 non-null  float64       
 8   device         973928 non-null  object        
dtypes: category(2), datetime64[ns](3), float64(1), object(3)
memory usage: 61.3+ MB


In [16]:
sleepdata.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 333 entries, 0 to 332
Data columns (total 21 columns):
 #   Column        Non-Null Count  Dtype          
---  ------        --------------  -----          
 0   maindate      333 non-null    datetime64[ns] 
 1   fromdate      333 non-null    datetime64[ns] 
 2   todate        333 non-null    datetime64[ns] 
 3   bedtime       333 non-null    datetime64[ns] 
 4   waketime      333 non-null    datetime64[ns] 
 5   inbed         333 non-null    timedelta64[ns]
 6   awake         333 non-null    timedelta64[ns]
 7   fellasleepin  331 non-null    timedelta64[ns]
 8   sessions      333 non-null    int64          
 9   asleep        333 non-null    timedelta64[ns]
 10  efficiency    333 non-null    float64        
 11  quality       331 non-null    timedelta64[ns]
 12  deep          282 non-null    timedelta64[ns]
 13  sleepbpm      331 non-null    float64        
 14  daybpm        329 non-null    float64        
 15  wakingbpm     333 non-n

In [17]:
workoutdata.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 362 entries, 0 to 361
Data columns (total 13 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   workouttype            362 non-null    category      
 1   duration               362 non-null    float64       
 2   durationunit           362 non-null    object        
 3   totaldistance          362 non-null    float64       
 4   totaldistanceunit      362 non-null    object        
 5   totalenergyburned      362 non-null    float64       
 6   totalenergyburnedunit  362 non-null    object        
 7   sourcename             362 non-null    category      
 8   device                 362 non-null    object        
 9   creationdate           362 non-null    datetime64[ns]
 10  startdate              362 non-null    datetime64[ns]
 11  enddate                362 non-null    datetime64[ns]
 12  workoutname            362 non-null    category      
dtypes: ca

In [18]:
health_excel = healthdata.sample(10000)
workout_excel = workoutdata.copy()
sleep_excel = sleepdata.copy()

with pd.ExcelWriter("lifestyle.xlsx") as writer:
    health_excel.to_excel(writer, sheet_name="health", index=False)
    workout_excel.to_excel(writer, sheet_name="workout", index=False)
    sleep_excel.to_excel(writer, sheet_name="sleep", index=False)

In [None]:
sleepdata = sleepdata.pipe(helper.timetoseconds)

In [31]:

dayMerged = (
    (healthdata
        .assign(creationdate = lambda x: x.creationdate.dt.normalize())
        .query("datatype in ['BodyMassIndex', 'Height', 'BodyMass', 'HeartRate', \
                                'RespiratoryRate', 'BodyFatPercentage', 'LeanBodyMass',\
                                'RestingHeartRate', 'VO2Max',\
                                'WalkingHeartRateAverage', 'EnvironmentalAudioExposure',\
                                'HeadphoneAudioExposure', 'WalkingDoubleSupportPercentage',\
                                'SixMinuteWalkTestDistance','WalkingSpeed',\
                                'WalkingStepLength', 'WalkingAsymmetryPercentage',\
                                'HKDataTypeSleepDurationGoal', 'AppleWalkingSteadiness',\
                                'HeartRateVariabilitySDNN'] ")
        .groupby(['creationdate', 'datatype'],observed =True)['value'].mean().unstack()
    )
    ## merging the two DFs side by side
    .merge(
            (healthdata
                .assign(creationdate = lambda x: x.creationdate.dt.normalize())
                .query("datatype in ['StepCount', 'DistanceWalkingRunning', 'BasalEnergyBurned',\
                                        'ActiveEnergyBurned', 'FlightsClimbed',\
                                        'AppleExerciseTime',\
                                        'AppleStandTime']")
                .groupby(['creationdate', 'datatype'], observed =True)['value'].sum().unstack()
            ),
           how='outer', left_index=True, right_index=True
    )
    .merge(
        (sleepdata
            .rename(columns={'maindate':'creationdate'})
            .set_index('creationdate')
        ),
        how='left', left_index=True, right_index=True
    )
    .merge(
        (workoutdata.set_index('creationdate')
            .groupby(pd.Grouper(freq='1D'))[['duration', 'totaldistance','totalenergyburned']]
            .sum()
        ),how='left', left_index=True, right_index=True
    )
    .merge(
        (healthdata
        .assign(creationdate = lambda x: x.creationdate.dt.normalize())
        .query("datatype in ['HeartRate','HeartRateVariabilitySDNN'] ")
        .groupby(['creationdate', 'datatype'],observed =True)['value'].agg(['min', 'max']).unstack()
        .pipe(helper.flatten_cols)
        ),how='left', left_index=True, right_index=True
    )
)


In [32]:
dayMerged

Unnamed: 0_level_0,BodyMassIndex,Height,BodyMass,HeartRate,RespiratoryRate,BodyFatPercentage,LeanBodyMass,RestingHeartRate,VO2Max,WalkingHeartRateAverage,...,respavg,respmin,respmax,duration,totaldistance,totalenergyburned,min_HeartRate,min_HeartRateVariabilitySDNN,max_HeartRate,max_HeartRateVariabilitySDNN
creationdate,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2020-01-03,,,,,,,,,,,...,,,,,,,,,,
2020-03-07,,,,,,,,,,,...,,,,,,,,,,
2020-03-08,,,,,,,,,,,...,,,,,,,,,,
2021-07-27,,181.0,89.0,,,,,,,,...,,,,,,,,,,
2021-07-28,,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-09-05,,,,71.075950,16.970588,,,68.0,,90.0,...,16.9,13.5,22.0,0.000000,0.0,0.000,56.0,20.6158,121.0,74.0787
2022-09-06,,,,100.044682,17.107843,,,63.0,,101.0,...,17.0,14.5,22.5,21.420656,0.0,197.042,46.0,19.3722,163.0,89.1370
2022-09-07,,,,102.231937,18.300000,,,63.0,,83.0,...,16.6,13.5,21.5,20.453234,0.0,190.569,46.0,22.5268,175.0,74.8563
2022-09-08,,,,104.105966,17.145833,,,67.0,,97.0,...,17.9,15.0,22.0,45.388385,0.0,192.078,55.0,18.3752,177.0,65.3777


In [21]:
cardiovascular = (
    mergeddata.loc[:, ['BodyMassIndex','HeartRate', 'RespiratoryRate',
       'BodyFatPercentage', 'LeanBodyMass', 'RestingHeartRate', 'VO2Max',
       'HeartRateVariabilitySDNN','BasalEnergyBurned', 'ActiveEnergyBurned',
       'sleepbpm','daybpm', 'wakingbpm', 'hrv', 'sleephrv', 'respavg', 'respmin',
       'respmax','min_HeartRate', 'max_HeartRate']]
       .query("creationdate > '2021-09-14'")
       .rename(columns = {'HeartRateVariabilitySDNN':'hrvstd'})
       #.dropna(subset=['HeartRate', 'RestingHeartRate','hrv'], how='all', axis=0)
       .assign(BodyMassIndex = lambda x: x['BodyMassIndex'].interpolate().bfill(),
               BodyFatPercentage = lambda x: x['BodyFatPercentage'].interpolate().bfill(),
               LeanBodyMass = lambda x: x['LeanBodyMass'].interpolate().bfill(),
               VO2Max = lambda x: x['VO2Max'].interpolate().bfill()
               )
       .interpolate()
       .dropna()
)



In [22]:
cardiovascular

Unnamed: 0_level_0,BodyMassIndex,HeartRate,RespiratoryRate,BodyFatPercentage,LeanBodyMass,RestingHeartRate,VO2Max,hrvstd,BasalEnergyBurned,ActiveEnergyBurned,sleepbpm,daybpm,wakingbpm,hrv,sleephrv,respavg,respmin,respmax,min_HeartRate,max_HeartRate
creationdate,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
2021-10-09,25.600,110.562966,17.039474,0.15700,70.64340,67.0,31.38,38.516892,1970.979,707.228,71.7,83.4,71.0,70.0,55.0,16.1,13.0,20.5,52.0,170.0
2021-10-10,25.525,74.164163,16.545455,0.15625,70.53705,74.0,31.38,42.546729,1845.147,188.375,74.1,79.9,76.0,49.0,43.0,16.6,14.5,19.0,59.0,105.0
2021-10-11,25.450,112.235480,16.398148,0.15550,70.43070,74.5,31.38,47.984391,2035.349,744.541,69.7,87.7,67.0,88.0,61.0,16.1,11.5,18.5,49.0,166.0
2021-10-12,25.375,110.395110,17.056818,0.15475,70.32435,75.0,31.38,50.573500,1987.530,744.673,69.5,83.1,61.0,104.0,64.0,16.3,13.0,19.0,57.0,172.0
2021-10-13,25.300,103.709628,16.741379,0.15400,70.21800,70.0,31.38,52.902292,1944.299,583.970,68.3,81.0,65.0,42.0,38.0,15.7,12.5,20.0,48.0,160.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-09-05,24.200,71.075950,16.970588,0.14200,68.12520,68.0,35.36,42.446709,1904.939,385.227,63.6,77.3,64.0,65.0,56.0,16.9,13.5,22.0,56.0,121.0
2022-09-06,24.200,100.044682,17.107843,0.14200,68.12520,63.0,35.36,52.398218,1876.003,586.126,62.8,74.6,62.0,57.0,49.0,17.0,14.5,22.5,46.0,163.0
2022-09-07,24.200,102.231937,18.300000,0.14200,68.12520,63.0,35.36,49.178608,1935.037,807.137,63.6,76.2,63.0,58.0,53.0,16.6,13.5,21.5,46.0,175.0
2022-09-08,24.200,104.105966,17.145833,0.14200,68.12520,67.0,35.36,44.145955,1953.545,792.980,69.3,79.4,68.0,70.0,54.0,17.9,15.0,22.0,55.0,177.0


In [23]:
healthdata.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 973928 entries, 0 to 986936
Data columns (total 9 columns):
 #   Column         Non-Null Count   Dtype         
---  ------         --------------   -----         
 0   datatype       973928 non-null  category      
 1   sourcename     973928 non-null  category      
 2   sourceversion  973928 non-null  object        
 3   unit           973928 non-null  object        
 4   creationdate   973928 non-null  datetime64[ns]
 5   startdate      973928 non-null  datetime64[ns]
 6   enddate        973928 non-null  datetime64[ns]
 7   value          973928 non-null  float64       
 8   device         973928 non-null  object        
dtypes: category(2), datetime64[ns](3), float64(1), object(3)
memory usage: 61.3+ MB


In [86]:
custom={"id":"INT PRIMARY KEY",
        "creationdate": "DATETIME",
        "startdate": "DATETIME",
        "enddate": "DATETIME",
        }
dataloader.full_load(df = healthdata, tbl = "health", hasindex = False, custom = custom)

DataFrame full loaded to Table: health


In [24]:
workoutdata.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 362 entries, 0 to 361
Data columns (total 13 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   workouttype            362 non-null    category      
 1   duration               362 non-null    float64       
 2   durationunit           362 non-null    object        
 3   totaldistance          362 non-null    float64       
 4   totaldistanceunit      362 non-null    object        
 5   totalenergyburned      362 non-null    float64       
 6   totalenergyburnedunit  362 non-null    object        
 7   sourcename             362 non-null    category      
 8   device                 362 non-null    object        
 9   creationdate           362 non-null    datetime64[ns]
 10  startdate              362 non-null    datetime64[ns]
 11  enddate                362 non-null    datetime64[ns]
 12  workoutname            362 non-null    category      
dtypes: ca

In [25]:
custom={"id":"INT PRIMARY KEY",
        "creationdate": "DATETIME",
        "startdate": "DATETIME",
        "enddate": "DATETIME",
        }
dataloader.full_load(df = workoutdata, tbl = "workout", hasindex = False, custom = custom)

DataFrame full loaded to Table: workout


In [26]:
sleepdata.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 333 entries, 0 to 332
Data columns (total 21 columns):
 #   Column        Non-Null Count  Dtype          
---  ------        --------------  -----          
 0   maindate      333 non-null    datetime64[ns] 
 1   fromdate      333 non-null    datetime64[ns] 
 2   todate        333 non-null    datetime64[ns] 
 3   bedtime       333 non-null    datetime64[ns] 
 4   waketime      333 non-null    datetime64[ns] 
 5   inbed         333 non-null    timedelta64[ns]
 6   awake         333 non-null    timedelta64[ns]
 7   fellasleepin  331 non-null    timedelta64[ns]
 8   sessions      333 non-null    int64          
 9   asleep        333 non-null    timedelta64[ns]
 10  efficiency    333 non-null    float64        
 11  quality       331 non-null    timedelta64[ns]
 12  deep          282 non-null    timedelta64[ns]
 13  sleepbpm      331 non-null    float64        
 14  daybpm        329 non-null    float64        
 15  wakingbpm     333 non-n

In [28]:
sleepdata.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 333 entries, 0 to 332
Data columns (total 21 columns):
 #   Column        Non-Null Count  Dtype         
---  ------        --------------  -----         
 0   maindate      333 non-null    datetime64[ns]
 1   fromdate      333 non-null    datetime64[ns]
 2   todate        333 non-null    datetime64[ns]
 3   bedtime       333 non-null    datetime64[ns]
 4   waketime      333 non-null    datetime64[ns]
 5   inbed         333 non-null    int64         
 6   awake         333 non-null    int64         
 7   fellasleepin  331 non-null    float64       
 8   sessions      333 non-null    int64         
 9   asleep        333 non-null    int64         
 10  efficiency    333 non-null    float64       
 11  quality       331 non-null    float64       
 12  deep          282 non-null    float64       
 13  sleepbpm      331 non-null    float64       
 14  daybpm        329 non-null    float64       
 15  wakingbpm     333 non-null    float64   

In [30]:
custom={"id":"INT PRIMARY KEY",
        "maindate": "DATE",
        "fromdate": "DATE",
        "todate": "DATE",
        "bedtime": "DATETIME",
        "waketime": "DATETIME",
        }
dataloader.full_load(df = sleepdata, tbl = "sleep", hasindex = False, custom = custom)

DataFrame full loaded to Table: sleep


In [35]:
dayMerged.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 371 entries, 2020-01-03 to 2022-09-09
Data columns (total 54 columns):
 #   Column                          Non-Null Count  Dtype         
---  ------                          --------------  -----         
 0   BodyMassIndex                   74 non-null     float64       
 1   Height                          1 non-null      float64       
 2   BodyMass                        82 non-null     float64       
 3   HeartRate                       335 non-null    float64       
 4   RespiratoryRate                 334 non-null    float64       
 5   BodyFatPercentage               74 non-null     float64       
 6   LeanBodyMass                    74 non-null     float64       
 7   RestingHeartRate                323 non-null    float64       
 8   VO2Max                          36 non-null     float64       
 9   WalkingHeartRateAverage         318 non-null    float64       
 10  EnvironmentalAudioExposure      334 non-null    float64

In [49]:
custom={"id":"INT PRIMARY KEY",
        "creationdate": "DATETIME",
        "fromdate": "DATETIME2",
        "todate": "DATETIME2",
        "bedtime": "DATETIME2",
        "waketime": "DATETIME2",
        }
dataloader.full_load(df = dayMerged, tbl = "dayMerged", hasindex = True, custom = custom)

DataError: ('22007', '[22007] [Microsoft][ODBC Driver 17 for SQL Server][SQL Server]The conversion of a datetime2 data type to a datetime data type resulted in an out-of-range value. (242) (SQLExecute)')