# Weekly profile extraction using dcarte 

This notebook implements how to use dcarte to extract weekly aggragetes for the ukdri CRT in-house datasets 

In [2]:
# !pip install dcarte

In [3]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt
import os
import sys
# # This line allows you to clone the repository and skip installing dcarte
# sys.path.insert(0, os.path.abspath("../.."))
import dcarte
from scipy.stats import circmean,circstd
from dcarte.utils import (between_time,
                          time_to_angles)
# This line allows live changes to the imported file 
%load_ext autoreload
%autoreload 2


# Objective
The purpose of the weekly report is to provide a unified perspectives on the daily and weekly summaries at a household level. This will require first to load the datasets of interest, then construct from them a unified dataset with a daily frequency, and then utilise that daily dataset to generate the various reports that we want to make.

# Step 1 - we start by loading all the datasets we wish to have a weekly and daily views 

In [6]:
update = False
Motion = dcarte.load('Motion','base',update=update)
Entryway = dcarte.load('Entryway','Base',update=update)
Physiology = dcarte.load('Physiology','Base',update=update)
Sleep = dcarte.load('Sleep','Base',update=update)

Finished Loading Motion in:                    0.6 seconds   
Finished Loading Entryway in:                  0.1 seconds   
Finished Loading Physiology in:                0.0 seconds   
Finished Loading Sleep in:                     2.0 seconds   


# Step 2 - We concentrate on the Motion dataset in order to provide a daily and weekly summary for each site.
If you execute the command `Motion.head()`, you will find that the Motion view includes three columns that represent sensor activity for a specific `patient id` at a specific `location name` at a certain datetime located at a specific `start date`, as seen in the example below.
To tally daily occurrences at the various sites, as well as their cumulative counts, we will utilise Panda methods to change this table into a more usable format.

In [7]:
Motion.head()

Unnamed: 0,patient_id,location_name,start_date
0,2GN1PHeHwRzNYQ7q4Nvg7g,Front door,2021-05-14 13:33:44
1,2GN1PHeHwRzNYQ7q4Nvg7g,Bathroom,2021-05-14 13:34:46
2,2GN1PHeHwRzNYQ7q4Nvg7g,Kitchen,2021-05-14 13:34:51
3,2GN1PHeHwRzNYQ7q4Nvg7g,Bedroom,2021-05-14 13:34:52
4,2GN1PHeHwRzNYQ7q4Nvg7g,Hallway,2021-05-14 13:34:55


## step 2.a creating daily activity per location
This sequence will perform the following commands:
- Using the 'query' method we remove any rows whos locations exist in the 'drop' list.
- We use the datetime column ('start_date') as the index for the dataframe.
- We partition the dataset into independent patients and then into household locations within each patient.
- We produce 24-hour windows beginning at noon using the resample command.
- And we use the count command to get the total number of observations made within that time period.
- We switch the location and datatime index columns using the swaplevel command.
- Then we change the table to a wide format using the unstack command.
- Finally, we provide a total column that totals the daily data.
- Importantly, we remove any day with zero total activity.

In [8]:
drop = ['bed_in','Back door','Front door']
activity_metrics = (Motion.
    query('location_name not in @drop').
    assign(activity=True).
    set_index('start_date').
    groupby(['patient_id','location_name']).
    resample('1D',offset='12h').
    activity.
    count().
    swaplevel(-2,-1).
    unstack())
activity_metrics['Total'] = activity_metrics.sum(axis=1)
activity_metrics= activity_metrics.query('Total > 0.0')

## step 2.b Creating a weekly table with the mean and standard deviation for each column
The following commands will be completed by this sequence:
- We reset the index in order to convert the multiindex to standalone columns.
- Using the groupby command, we next separate the dataset into independent patients.
- Using the resample command, we generate one-week frames.
- And we use the agg command to quantify the mean and standard deviations for each column.


In [9]:
# activity_metrics.columns = pd.MultiIndex.from_product([['Household activity'],
#                                                        activity_metrics.columns])
activity_weeklies = (activity_metrics.
                     reset_index().
                     groupby('patient_id').
                     resample('1W',on='start_date').
                     agg({col:['mean','std'] for col in  activity_metrics.columns}))
activity_weeklies

Unnamed: 0_level_0,location_name,Bathroom,Bathroom,Bedroom,Bedroom,Hallway,Hallway,Kitchen,Kitchen,Lounge,Lounge,bed_out,bed_out,Total,Total
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std
patient_id,start_date,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2
2GN1PHeHwRzNYQ7q4Nvg7g,2021-05-16,22.666667,8.020806,151.333333,84.559644,41.333333,3.785939,107.333333,23.094011,46.000000,8.000000,,,368.666667,66.304852
2GN1PHeHwRzNYQ7q4Nvg7g,2021-05-23,15.428571,4.825527,142.000000,42.003968,41.000000,6.733003,87.857143,20.812084,41.571429,6.347103,,,327.857143,43.414393
2GN1PHeHwRzNYQ7q4Nvg7g,2021-05-30,20.714286,9.123491,136.857143,45.670664,45.571429,9.180725,95.428571,19.311975,54.857143,16.687606,,,353.428571,59.393763
2GN1PHeHwRzNYQ7q4Nvg7g,2021-06-06,0.000000,,52.000000,,8.000000,,16.000000,,7.000000,,,,83.000000,
2GN1PHeHwRzNYQ7q4Nvg7g,2021-06-13,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
YYGGmsEaf2gP4JNN9kMprV,2021-12-12,113.142857,42.357324,46.285714,15.173599,18.571429,3.309438,157.428571,35.279159,78.000000,27.766887,5.571429,2.439750,419.000000,73.907600
YYGGmsEaf2gP4JNN9kMprV,2021-12-19,101.142857,16.014874,45.285714,12.763415,22.857143,5.580579,167.000000,36.574126,95.000000,29.988887,6.714286,1.496026,438.000000,73.378017
YYGGmsEaf2gP4JNN9kMprV,2021-12-26,107.428571,41.604716,58.285714,19.154758,27.571429,8.734169,170.000000,32.934278,73.000000,27.233558,8.142857,1.345185,444.428571,106.255756
YYGGmsEaf2gP4JNN9kMprV,2022-01-02,108.428571,37.880765,75.714286,15.326603,34.428571,9.606545,176.142857,33.835034,87.857143,14.158961,8.571429,2.819997,491.142857,83.547249


# Step 3 - The sleep dataset allows us to build daily summaries using the sleep mat's measurements.
When the command 'Sleep.head()' is executed, we see that the Sleep view has eight columns that are gathered on a minute-by-minute basis whenever the bed is occupied. We are interested in heart rate, respiratory rate, and snoring data for a given 'patient id' during a specified time period as defined by the'start date'.
Additionally, we are interested in wake-up and bedtime timings, the amount of time spent in bed during that time period, as well as the amount of time spent outside of bed.

In [10]:
Sleep.head()

Unnamed: 0,start_date,patient_id,home_id,state,heart_rate,respiratory_rate,snoring,source,timezone
0,2019-04-01 01:00:00,Mhy2uUxJnCtsEZbToCDDEE,8cWNKruugFF78aRVmnSV7h,DEEP,55.0,16.0,0.0,raw_sleep_mat,Europe/London
1,2019-04-01 01:01:00,Mhy2uUxJnCtsEZbToCDDEE,8cWNKruugFF78aRVmnSV7h,DEEP,55.0,16.0,0.0,raw_sleep_mat,Europe/London
2,2019-04-01 01:02:00,Mhy2uUxJnCtsEZbToCDDEE,8cWNKruugFF78aRVmnSV7h,DEEP,56.0,16.0,1.0,raw_sleep_mat,Europe/London
3,2019-04-01 01:03:00,Mhy2uUxJnCtsEZbToCDDEE,8cWNKruugFF78aRVmnSV7h,DEEP,56.0,15.0,1.0,raw_sleep_mat,Europe/London
4,2019-04-01 01:04:00,Mhy2uUxJnCtsEZbToCDDEE,8cWNKruugFF78aRVmnSV7h,DEEP,56.0,16.0,1.0,raw_sleep_mat,Europe/London


# Step 3a - extracting daily sleep measures 
- We begin by filtering out daily naps using the 'between time' wrapper, which retains just observations between 5pm and 11am.
- We duplicate the 'start date' column and rename it 'time'
- And we use the astype function to convert the snoring metrics to floats
- Then, we utilise the agg command per patient to build daily summary for important fields.
- For heart rate and respiratory rate, we calculate the average for the duration. 
- For snoring, we add the True values.
- For the'start date' column, we extract the first and last observation for that period of time as well as counting the number of observations as a proxy for when the bed was occupied
- For simplicity we shorten the columns names 
- We then normalise the time in bed (tib) column to reflect hours rather than minutes
- And calculate the time out of bed (tob) as a function of wake up (wup) minus time to bed (ttb) minus the actual time in bed (tib)
- We next convert both the time to bed and the time to wake up to angles to simplify computations. 
- Finally, we rename the columns to improve readability.
- Importantly, we remove any day with no observations.

In [97]:
sleep_metrics = between_time(Sleep,'start_date','17:00','11:00')
sleep_metrics['time'] = sleep_metrics['start_date']
sleep_metrics['snoring'] = sleep_metrics['snoring'].astype(float)
sleep_metrics = (sleep_metrics.set_index('time').
                    groupby('patient_id').
                    resample('1D',offset='12h').
                    agg({'heart_rate':['mean'],
                         'respiratory_rate':['mean'],
                         'snoring':['sum'],
                         'start_date':['first','last','count']}))
sleep_metrics.columns = ['hr','br','snr','ttb','wup','tib']
sleep_metrics['tib'] = sleep_metrics['tib']/60
sleep_metrics['tob'] = (sleep_metrics.wup - sleep_metrics.ttb).dt.total_seconds()/60**2 - sleep_metrics.tib
sleep_metrics.ttb = sleep_metrics.ttb.dt.time.apply(time_to_angles)
sleep_metrics.wup = sleep_metrics.wup.dt.time.apply(time_to_angles)
sleep_metrics.columns = ['Heart rate','Breathing rate','Snoring','Time to bed','Wake up time','Time in bed','Time out of bed']
sleep_metrics = sleep_metrics.dropna(subset=['Heart rate','Breathing rate','Time to bed','Wake up time'])


In [98]:
sleep_metrics

Unnamed: 0_level_0,Unnamed: 1_level_0,Heart rate,Breathing rate,Snoring,Time to bed,Wake up time,Time in bed,Time out of bed
patient_id,time,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2GN1PHeHwRzNYQ7q4Nvg7g,2021-05-28 12:00:00,58.796000,17.740000,16.0,42.50,116.75,4.166667,0.783333
2GN1PHeHwRzNYQ7q4Nvg7g,2021-05-29 12:00:00,62.453453,18.462462,42.0,255.00,121.25,5.550000,9.533333
2GN1PHeHwRzNYQ7q4Nvg7g,2021-05-30 12:00:00,66.961905,17.500000,20.0,335.25,134.25,7.000000,3.600000
2GN1PHeHwRzNYQ7q4Nvg7g,2021-07-14 12:00:00,65.712062,19.344358,0.0,341.25,165.00,8.566667,3.683333
2GN1PHeHwRzNYQ7q4Nvg7g,2021-07-15 12:00:00,59.497427,18.658662,17.0,305.00,94.00,9.716667,0.216667
...,...,...,...,...,...,...,...,...
YYGGmsEaf2gP4JNN9kMprV,2021-12-26 12:00:00,54.805668,12.259109,0.0,291.00,119.50,8.233333,4.333333
YYGGmsEaf2gP4JNN9kMprV,2021-12-27 12:00:00,52.773352,12.218407,0.0,278.75,148.50,12.133333,3.183333
YYGGmsEaf2gP4JNN9kMprV,2021-12-28 12:00:00,51.667939,12.326336,0.0,255.00,93.50,8.733333,4.500000
YYGGmsEaf2gP4JNN9kMprV,2021-12-29 12:00:00,55.976395,12.858369,0.0,274.25,134.25,7.766667,6.900000


# Step 3b - extracting weekly sleep measures 
To get the weekly measurements, two circular moments must be defined (to allow the approximation to account for the fact that the time to bed and wake up time are at an angle). Apart from that, the process is identical to the previous activity measurement.

In [99]:
def mean_time(x): return pd.Series(circmean(x,high=360),name='mean')
def std_time(x): return pd.Series(circstd(x,high=360),name='std')
sleep_weeklies = (sleep_metrics.
                    reset_index().
                    groupby('patient_id').
                    resample('1W',on='time').
                    agg({'Heart rate': ['mean','std'],
                         'Breathing rate': ['mean','std'],
                         'Snoring': ['mean','std'],
                         'Time to bed': [mean_time,std_time],
                         'Wake up time':   [mean_time,std_time],
                         'Time in bed': ['mean','std'],
                         'Time out of bed': ['mean','std']}))
sleep_weeklies

Unnamed: 0_level_0,Unnamed: 1_level_0,Heart rate,Heart rate,Breathing rate,Breathing rate,Snoring,Snoring,Time to bed,Time to bed,Wake up time,Wake up time,Time in bed,Time in bed,Time out of bed,Time out of bed
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,std,mean,std,mean,std,mean_time,std_time,mean_time,std_time,mean,std,mean,std
patient_id,time,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2
2GN1PHeHwRzNYQ7q4Nvg7g,2021-05-30,62.737119,4.090336,17.900821,0.500980,26.000000,14.000000,332.918487,65.609388,124.072541,7.427738,5.572222,1.416797,4.638889,4.466553
2GN1PHeHwRzNYQ7q4Nvg7g,2021-06-06,,,,,,,,,,,,,,
2GN1PHeHwRzNYQ7q4Nvg7g,2021-06-13,,,,,,,,,,,,,,
2GN1PHeHwRzNYQ7q4Nvg7g,2021-06-20,,,,,,,,,,,,,,
2GN1PHeHwRzNYQ7q4Nvg7g,2021-06-27,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
YYGGmsEaf2gP4JNN9kMprV,2021-12-05,54.919392,3.417195,13.146993,0.572235,0.000000,0.000000,330.141114,33.923038,133.222421,14.881502,7.797619,2.178035,3.028571,1.849167
YYGGmsEaf2gP4JNN9kMprV,2021-12-12,50.564697,3.378402,12.366950,0.604171,0.000000,0.000000,323.004844,40.629142,127.736728,30.010034,8.633333,2.101719,2.188095,2.296529
YYGGmsEaf2gP4JNN9kMprV,2021-12-19,52.349340,4.145541,12.832216,0.747954,1.000000,2.645751,300.591152,19.215876,126.666207,22.748251,9.754762,0.995346,2.616667,1.471929
YYGGmsEaf2gP4JNN9kMprV,2021-12-26,51.531468,1.715852,12.269703,0.129481,1.285714,1.603567,282.568799,17.598839,135.474774,18.289661,10.461905,1.629949,3.745238,1.220910


# Step 4 - The Physiology dataset allows us to build daily summaries for any active vital sign measured at the household.
When the command 'Physiology.head()' is executed, we see that the Physiology view has also eight columns that are optimaly gathered on a daily basis. 

In [117]:
Physiology.head()

Unnamed: 0,start_date,device_type,patient_id,home_id,value,unit,source,timezone
0,2021-04-30 06:59:23,Pulse_oximeter,HdozjhTxFXBhYAnKje524Q,8boGptP7WpgZM3D1UJRKvR,65.0,/min,raw_heart_rate,Europe/London
1,2021-04-30 07:02:39,Electronic_sphygmomanometer,HdozjhTxFXBhYAnKje524Q,8boGptP7WpgZM3D1UJRKvR,71.0,/min,raw_heart_rate,Europe/London
2,2021-04-30 08:20:00,Electronic_sphygmomanometer,U2dZSjjycMm5bRNvHcLrAr,99f6zQLn6nB4cqdHQFwKhV,51.0,/min,raw_heart_rate,Europe/London
3,2021-04-30 08:25:31,Pulse_oximeter,U2dZSjjycMm5bRNvHcLrAr,99f6zQLn6nB4cqdHQFwKhV,30.0,/min,raw_heart_rate,Europe/London
4,2021-04-30 08:26:42,Pulse_oximeter,U2dZSjjycMm5bRNvHcLrAr,99f6zQLn6nB4cqdHQFwKhV,30.0,/min,raw_heart_rate,Europe/London


# Step 4a - extracting daily vital signs measures 
- Using the 'query' method we keep only rows whos factors exist in the 'factors' list.
- ...
- Importantly, we remove any day with no observations.

In [115]:
factors = ['raw_heart_rate','raw_body_weight','raw_body_mass_index',
           'raw_body_temperature','diastolic_bp','systolic_bp','raw_total_body_fat']
daily_physiology = Physiology.query("source in @factors")
daily_physiology = (daily_physiology.reset_index(drop=True).
                        groupby(['patient_id','source']).
                        resample('1D',on='start_date',offset='12h').
                        agg({'value':'mean'}).
                        swaplevel(-2,-1).
                        unstack().
                        droplevel(0,axis=1))
daily_physiology = daily_physiology[factors][daily_physiology[factors].isnull().sum(axis=1)<len(factors)]
daily_physiology.columns = ['Heart rate','Body_weight','BMI','Temperature','Diastolic_BP','Systolic_BP', 'Body_Fat']
daily_physiology

Unnamed: 0_level_0,Unnamed: 1_level_0,Heart rate,Body_weight,BMI,Temperature,Diastolic_BP,Systolic_BP,Body_Fat
patient_id,start_date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2GN1PHeHwRzNYQ7q4Nvg7g,2021-05-04 12:00:00,,58.100000,20.103806,,,,
2GN1PHeHwRzNYQ7q4Nvg7g,2021-05-09 12:00:00,,103.833333,35.928489,,,,
2GN1PHeHwRzNYQ7q4Nvg7g,2021-05-14 12:00:00,74.666667,,,36.622125,,,
2GN1PHeHwRzNYQ7q4Nvg7g,2021-07-05 12:00:00,63.000000,,,36.893250,,,
2GN1PHeHwRzNYQ7q4Nvg7g,2021-07-14 12:00:00,71.250000,,,36.762600,87.5,114.5,
...,...,...,...,...,...,...,...,...
YYGGmsEaf2gP4JNN9kMprV,2021-12-09 12:00:00,44.666667,,,36.000000,75.0,110.0,
YYGGmsEaf2gP4JNN9kMprV,2021-12-13 12:00:00,73.500000,,,36.177000,68.0,98.0,
YYGGmsEaf2gP4JNN9kMprV,2021-12-21 12:00:00,68.000000,,,36.301000,84.0,128.0,
YYGGmsEaf2gP4JNN9kMprV,2021-12-23 12:00:00,55.250000,,,35.917000,64.5,105.0,


In [116]:
physiology_weeklies = (daily_physiology.
                            reset_index().
                            groupby('patient_id').
                            resample('1W',on='start_date').
                            agg({col:['mean','std'] for col in  daily_physiology.columns}))
physiology_weeklies

Unnamed: 0_level_0,Unnamed: 1_level_0,Heart rate,Heart rate,Body_weight,Body_weight,BMI,BMI,Temperature,Temperature,Diastolic_BP,Diastolic_BP,Systolic_BP,Systolic_BP,Body_Fat,Body_Fat
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std
patient_id,start_date,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2
2GN1PHeHwRzNYQ7q4Nvg7g,2021-05-09,,,80.966667,32.33835,28.016148,11.189741,,,,,,,,
2GN1PHeHwRzNYQ7q4Nvg7g,2021-05-16,74.666667,,,,,,36.622125,,,,,,,
2GN1PHeHwRzNYQ7q4Nvg7g,2021-05-23,,,,,,,,,,,,,,
2GN1PHeHwRzNYQ7q4Nvg7g,2021-05-30,,,,,,,,,,,,,,
2GN1PHeHwRzNYQ7q4Nvg7g,2021-06-06,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
YYGGmsEaf2gP4JNN9kMprV,2021-12-05,56.333333,0.471405,,,,,36.384500,0.351432,75.50,0.707107,115.5,0.707107,,
YYGGmsEaf2gP4JNN9kMprV,2021-12-12,44.666667,,,,,,36.000000,,75.00,,110.0,,,
YYGGmsEaf2gP4JNN9kMprV,2021-12-19,73.500000,,,,,,36.177000,,68.00,,98.0,,,
YYGGmsEaf2gP4JNN9kMprV,2021-12-26,61.625000,9.015611,,,,,36.109000,0.271529,74.25,13.788582,116.5,16.263456,,


In [4]:
dcarte.domains()

Unnamed: 0,RAW,LEGACY,BASE,PROFILE,SLEEP_STUDY,LOOKUP,BED_HABITS
0,Activity,Device_Type,Doors,Activity_Dailies,Diurnal,Device_Types,Bed_Occupancy
1,Appliances,Doors,Entryway,Activity_Weeklies,Nocturnal,Homes,
2,Behavioural,Entryway,Habitat,Light,Whitings_Tidy,Patients,
3,Blood_Pressure,Flags,Kitchen,Physiology_Dailies,Withings_Nights,,
4,Door,Light,Motion,Physiology_Weeklies,Withings_Tidy,,
5,Encounter,Motion,Physiology,Sleep_Dailies,,,
6,Environmental,Observation,Sleep,Sleep_Weeklies,,,
7,Issue,Physiology,Transitions,Temperature,,,
8,Observation_Notes,Temperature,,,,,
9,Procedure,Wellbeing,,,,,


In [5]:
df = dcarte.load('Sleep_Weeklies','PROFILE')
df

Finished Loading Sleep_Weeklies in:            0.1 seconds   


Unnamed: 0_level_0,Unnamed: 1_level_0,Heart rate,Heart rate,Breathing rate,Breathing rate,Snoring,Snoring,Time to bed,Time to bed,Wake up time,Wake up time,Time in bed,Time in bed,Time out of bed,Time out of bed
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,std,mean,std,mean,std,mean_time,std_time,mean_time,std_time,mean,std,mean,std
patient_id,start_date,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2
2GN1PHeHwRzNYQ7q4Nvg7g,2021-05-30,62.737119,4.090336,17.900821,0.500980,26.000000,14.000000,332.918487,65.609388,124.072541,7.427738,5.572222,1.416797,4.638889,4.466553
2GN1PHeHwRzNYQ7q4Nvg7g,2021-06-06,,,,,,,,,,,,,,
2GN1PHeHwRzNYQ7q4Nvg7g,2021-06-13,,,,,,,,,,,,,,
2GN1PHeHwRzNYQ7q4Nvg7g,2021-06-20,,,,,,,,,,,,,,
2GN1PHeHwRzNYQ7q4Nvg7g,2021-06-27,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
YYGGmsEaf2gP4JNN9kMprV,2021-12-12,50.564697,3.378402,12.366950,0.604171,0.000000,0.000000,323.004844,40.629142,127.736728,30.010034,8.633333,2.101719,2.188095,2.296529
YYGGmsEaf2gP4JNN9kMprV,2021-12-19,52.349340,4.145541,12.832216,0.747954,1.000000,2.645751,300.591152,19.215876,126.666207,22.748251,9.754762,0.995346,2.616667,1.471929
YYGGmsEaf2gP4JNN9kMprV,2021-12-26,51.531468,1.715852,12.269703,0.129481,1.285714,1.603567,282.568799,17.598839,135.474774,18.289661,10.461905,1.629949,3.745238,1.220910
YYGGmsEaf2gP4JNN9kMprV,2022-01-02,52.964012,3.138056,12.676393,0.411097,0.428571,1.133893,294.570969,38.833344,126.598189,20.907424,8.473810,2.172702,4.140476,2.175173


Unnamed: 0_level_0,location_name,Bathroom,Bedroom,Hallway,Kitchen,Lounge,bed_out,Total
patient_id,start_date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2GN1PHeHwRzNYQ7q4Nvg7g,2021-05-14 12:00:00,22.0,170.0,44.0,94.0,46.0,,376.0
2GN1PHeHwRzNYQ7q4Nvg7g,2021-05-15 12:00:00,15.0,59.0,37.0,134.0,54.0,,299.0
2GN1PHeHwRzNYQ7q4Nvg7g,2021-05-16 12:00:00,31.0,225.0,43.0,94.0,38.0,,431.0
2GN1PHeHwRzNYQ7q4Nvg7g,2021-05-17 12:00:00,20.0,153.0,40.0,112.0,38.0,,363.0
2GN1PHeHwRzNYQ7q4Nvg7g,2021-05-18 12:00:00,15.0,207.0,35.0,69.0,47.0,,373.0
...,...,...,...,...,...,...,...,...
YYGGmsEaf2gP4JNN9kMprV,2021-12-25 12:00:00,87.0,48.0,24.0,158.0,68.0,10.0,395.0
YYGGmsEaf2gP4JNN9kMprV,2021-12-26 12:00:00,200.0,97.0,35.0,204.0,73.0,9.0,618.0
YYGGmsEaf2gP4JNN9kMprV,2021-12-27 12:00:00,55.0,90.0,43.0,179.0,110.0,7.0,484.0
YYGGmsEaf2gP4JNN9kMprV,2021-12-28 12:00:00,114.0,69.0,24.0,186.0,87.0,12.0,492.0


In [16]:
rng = pd.date_range("2012-01-01", "2012-01-03",freq='h')
pd.DataFrame(np.ones(rng.shape),index = rng).resample('1D',offset='12h').sum()


Unnamed: 0,0
2011-12-31 12:00:00,12.0
2012-01-01 12:00:00,24.0
2012-01-02 12:00:00,13.0
