In [2]:
import os
import numpy as np
import pandas as pd
import warnings
warnings.filterwarnings('ignore')
from IPython.display import display
import importlib

In [22]:
activity = pd.read_csv('Activity.csv')
physiology = pd.read_csv('Physiology.csv')
labels = pd.read_csv('Labels.csv')
sleep = pd.read_csv('Sleep.csv')
demographics = pd.read_csv('Demographics.csv')

activity = activity.rename(columns={
    'patient_id': 'id',
    'date': 'date-time',
    'location_name': 'location'})

physiology = physiology.rename(columns={
    'patient_id': 'id',
    'date': 'date-time',
    'device_type': 'type'})

labels = labels.rename(columns={
    'patient_id': 'id',
    'date': 'date-time'})

sleep = sleep.rename(columns={
    'patient_id': 'id',
    'date': 'date-time'})

demographics = demographics.rename(columns={
    'patient_id': 'id'})

# Standardize location names
activity['location'] = activity['location'].replace({
    'Back Door': 'back-door',
    'Bathroom': 'bathroom',
    'Bedroom': 'bedroom',
    'Fridge Door': 'fridge-door',
    'Front Door': 'front-door',
    'Hallway': 'hallway',
    'Kitchen': 'kitchen',
    'Lounge': 'lounge'
})


physiology['type'] = physiology['type'].replace({
    'Body Temperature': 'body-temperature',
    'Systolic blood pressure': 'systolic-blood-pressure',
    'Diastolic blood pressure': 'diastolic-blood-pressure',
    'Heart rate': 'heart-rate',
    'Body weight': 'body-weight',
    'O/E - muscle mass': 'muscle-mass',
    'Total body water': 'total-body-water',
    'Skin Temperature': 'skin-temperature'
})


labels['type'] = labels['type'].replace({
    'Blood pressure': 'blood-pressure',
    'Agitation': 'agitation',
    'Body water': 'body-water',
    'Pulse': 'pulse',
    'Weight': 'weight',
    'Body temperature': 'body-temperature-label'
})


demographics['sex'] = demographics['sex'].map({'Female': 0, 'Male': 1})
demographics['age'] = demographics['age'].map({'(70, 80]': 75, '(80, 90]': 85, '(90, 110]': 100})


In [23]:
print(activity.shape,
      physiology.shape,
      labels.shape,
      sleep.shape,
      demographics.shape)

(1030559, 3) (17680, 5) (608, 3) (461423, 6) (56, 3)


In [24]:
print(activity.columns.values)
print(physiology.columns.values)
print(labels.columns.values)
print(sleep.columns.values)
print(demographics.columns.values)

['id' 'location' 'date-time']
['id' 'date-time' 'type' 'value' 'unit']
['id' 'date-time' 'type']
['id' 'date-time' 'state' 'heart_rate' 'respiratory_rate' 'snoring']
['id' 'age' 'sex']


In [26]:
print(activity.location.unique())
print(physiology.type.unique())
print(labels.type.unique())
print(sleep.state.unique())
print(demographics.age.unique(), demographics.sex.unique())

['fridge-door' 'kitchen' 'front-door' 'bedroom' 'back-door' 'bathroom'
 'lounge' 'hallway']
['body-temperature' 'systolic-blood-pressure' 'diastolic-blood-pressure'
 'heart-rate' 'body-weight' 'muscle-mass' 'total-body-water'
 'skin-temperature']
['blood-pressure' 'agitation' 'body-water' 'pulse' 'weight'
 'body-temperature-label']
['AWAKE' 'LIGHT' 'DEEP' 'REM']
[ 75  85 100] [0 1]


In [27]:
print(activity.id.unique().shape)
print(physiology.id.unique().shape)
print(labels.id.unique().shape)
print(sleep.id.unique().shape)
print(demographics.id.unique().shape)

(56,)
(56,)
(49,)
(17,)
(56,)


In [29]:
# Activity

import utils_data
importlib.reload(utils_data)

from utils_data import extract_12h_count, extract_12h_contextual, extract_12h_statistical

destination = '/home/ali/PycharmProjects/tihm/dataset'

data_activity = []

for idx, (id, group) in enumerate(activity.sort_values('id').groupby('id')):

    group['date-time'] = pd.to_datetime(group['date-time'])
    group = group.sort_values('date-time')
    group['date'] = group['date-time'].dt.date


    daily_count = extract_12h_count(group)
    daily_contextual = extract_12h_contextual(group)
    daily_statistical = extract_12h_statistical(group)

    daily = daily_count.merge(daily_contextual, on=['id', 'date', '12h'], how='outer').merge(daily_statistical, on=['id', 'date', '12h'], how='outer')

    data_activity.append(daily)


data_activity = pd.concat(data_activity, ignore_index=True)
data_activity.to_csv(os.path.join(destination, 'data-12h-activity' + '.csv'), index=False)
print(data_activity.shape)

(5444, 51)


In [31]:
print(data_activity.shape, daily.shape, daily_count.shape, daily_contextual.shape, daily_statistical.shape)
# print(daily_count.columns.values)
# print(daily_contextual.columns.values)
# print(daily_statistical.columns.values)
# print(daily.columns.values)
# daily_count.head()
# daily_contextual.head()
# daily_statistical.head()
# daily.head()
display(daily_count)
display(daily_contextual)
display(daily_statistical)
display(daily)
display(data_activity)

(5444, 51) (8, 51) (8, 11) (8, 11) (8, 35)


Unnamed: 0,id,date,12h,back-door,bathroom,bedroom,fridge-door,front-door,hallway,kitchen,lounge
0,fd100,2019-06-27,00-12,2.0,4.0,5.0,21.0,10.0,7.0,24.0,13.0
1,fd100,2019-06-27,12-24,0.0,20.0,43.0,2.0,15.0,40.0,76.0,84.0
2,fd100,2019-06-28,00-12,0.0,22.0,41.0,0.0,16.0,31.0,52.0,45.0
3,fd100,2019-06-28,12-24,0.0,10.0,50.0,7.0,5.0,27.0,93.0,75.0
4,fd100,2019-06-29,00-12,0.0,10.0,21.0,7.0,3.0,13.0,32.0,16.0
5,fd100,2019-06-29,12-24,0.0,23.0,35.0,20.0,12.0,48.0,78.0,80.0
6,fd100,2019-06-30,00-12,0.0,13.0,34.0,5.0,8.0,17.0,38.0,31.0
7,fd100,2019-06-30,12-24,0.0,14.0,52.0,3.0,12.0,26.0,82.0,79.0


Unnamed: 0,id,date,12h,total-events,unique-locations,active-location-ratio,private-to-public-ratio,location-entropy,location-dominance-ratio,back-and-forth-count,num-transitions
0,fd100,2019-06-27,00-12,86,8,1.0,0.2045,2.6488,0.2791,7,51
1,fd100,2019-06-27,12-24,280,7,0.875,0.315,2.397,0.3,38,215
2,fd100,2019-06-28,00-12,207,6,0.75,0.4922,2.4814,0.2512,16,164
3,fd100,2019-06-28,12-24,267,7,0.875,0.3077,2.2541,0.3483,23,176
4,fd100,2019-06-29,00-12,102,7,0.875,0.5082,2.5355,0.3137,7,69
5,fd100,2019-06-29,12-24,296,7,0.875,0.2816,2.5435,0.2703,50,230
6,fd100,2019-06-30,00-12,146,7,0.875,0.5465,2.5379,0.2603,10,110
7,fd100,2019-06-30,12-24,268,7,0.875,0.3529,2.3234,0.306,25,164


Unnamed: 0,id,date,12h,back-door-count-max,back-door-count-mean,back-door-count-std,back-door-count-sum,bathroom-count-max,bathroom-count-mean,bathroom-count-std,...,hallway-count-std,hallway-count-sum,kitchen-count-max,kitchen-count-mean,kitchen-count-std,kitchen-count-sum,lounge-count-max,lounge-count-mean,lounge-count-std,lounge-count-sum
0,fd100,2019-06-27,00-12,2,0.1176,0.4851,2,3,0.2353,0.7524,...,1.1757,7,21,1.4118,5.0997,24,10,0.7647,2.4882,13
1,fd100,2019-06-27,12-24,0,0.0,0.0,0,5,1.1765,1.6672,...,2.9779,40,18,4.4706,5.9699,76,20,4.9412,5.9526,84
2,fd100,2019-06-28,00-12,0,0.0,0.0,0,11,1.2941,3.0774,...,3.5042,31,20,3.0588,6.3391,52,15,2.6471,5.1471,45
3,fd100,2019-06-28,12-24,0,0.0,0.0,0,5,0.5882,1.372,...,2.9167,27,28,5.4706,7.5342,93,13,4.4118,4.8742,75
4,fd100,2019-06-29,00-12,0,0.0,0.0,0,7,0.5882,1.8048,...,2.3326,13,18,1.8824,4.8203,32,8,0.9412,2.1057,16
5,fd100,2019-06-29,12-24,0,0.0,0.0,0,9,1.3529,2.3168,...,3.4503,48,17,4.5882,5.6685,78,13,4.7059,4.7666,80
6,fd100,2019-06-30,00-12,0,0.0,0.0,0,6,0.7647,1.7864,...,2.3717,17,13,2.2353,4.4092,38,10,1.8235,3.4683,31
7,fd100,2019-06-30,12-24,0,0.0,0.0,0,6,0.8235,1.6672,...,2.2394,26,16,4.8235,4.9653,82,12,4.6471,3.8559,79


Unnamed: 0,id,date,12h,back-door,bathroom,bedroom,fridge-door,front-door,hallway,kitchen,...,hallway-count-std,hallway-count-sum,kitchen-count-max,kitchen-count-mean,kitchen-count-std,kitchen-count-sum,lounge-count-max,lounge-count-mean,lounge-count-std,lounge-count-sum
0,fd100,2019-06-27,00-12,2.0,4.0,5.0,21.0,10.0,7.0,24.0,...,1.1757,7,21,1.4118,5.0997,24,10,0.7647,2.4882,13
1,fd100,2019-06-27,12-24,0.0,20.0,43.0,2.0,15.0,40.0,76.0,...,2.9779,40,18,4.4706,5.9699,76,20,4.9412,5.9526,84
2,fd100,2019-06-28,00-12,0.0,22.0,41.0,0.0,16.0,31.0,52.0,...,3.5042,31,20,3.0588,6.3391,52,15,2.6471,5.1471,45
3,fd100,2019-06-28,12-24,0.0,10.0,50.0,7.0,5.0,27.0,93.0,...,2.9167,27,28,5.4706,7.5342,93,13,4.4118,4.8742,75
4,fd100,2019-06-29,00-12,0.0,10.0,21.0,7.0,3.0,13.0,32.0,...,2.3326,13,18,1.8824,4.8203,32,8,0.9412,2.1057,16
5,fd100,2019-06-29,12-24,0.0,23.0,35.0,20.0,12.0,48.0,78.0,...,3.4503,48,17,4.5882,5.6685,78,13,4.7059,4.7666,80
6,fd100,2019-06-30,00-12,0.0,13.0,34.0,5.0,8.0,17.0,38.0,...,2.3717,17,13,2.2353,4.4092,38,10,1.8235,3.4683,31
7,fd100,2019-06-30,12-24,0.0,14.0,52.0,3.0,12.0,26.0,82.0,...,2.2394,26,16,4.8235,4.9653,82,12,4.6471,3.8559,79


Unnamed: 0,id,date,12h,back-door,bathroom,bedroom,fridge-door,front-door,hallway,kitchen,...,hallway-count-std,hallway-count-sum,kitchen-count-max,kitchen-count-mean,kitchen-count-std,kitchen-count-sum,lounge-count-max,lounge-count-mean,lounge-count-std,lounge-count-sum
0,0697d,2019-06-28,00-12,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0000,0,0,0.0000,0.0000,0,0,0.0000,0.0000,0
1,0697d,2019-06-28,12-24,14.0,7.0,24.0,23.0,28.0,40.0,106.0,...,3.2460,40,20,4.8182,6.4041,106,16,3.6364,5.4820,80
2,0697d,2019-06-29,00-12,0.0,7.0,17.0,0.0,8.0,22.0,36.0,...,2.4495,22,14,1.6364,4.2934,36,9,0.8182,2.1075,18
3,0697d,2019-06-29,12-24,2.0,4.0,9.0,8.0,15.0,35.0,84.0,...,3.0183,35,16,3.8182,5.1143,84,21,4.5000,6.1082,99
4,0697d,2019-06-30,00-12,2.0,14.0,43.0,0.0,2.0,22.0,38.0,...,1.8257,22,11,1.7273,3.2246,38,7,1.1364,2.0539,25
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5439,fd100,2019-06-28,12-24,0.0,10.0,50.0,7.0,5.0,27.0,93.0,...,2.9167,27,28,5.4706,7.5342,93,13,4.4118,4.8742,75
5440,fd100,2019-06-29,00-12,0.0,10.0,21.0,7.0,3.0,13.0,32.0,...,2.3326,13,18,1.8824,4.8203,32,8,0.9412,2.1057,16
5441,fd100,2019-06-29,12-24,0.0,23.0,35.0,20.0,12.0,48.0,78.0,...,3.4503,48,17,4.5882,5.6685,78,13,4.7059,4.7666,80
5442,fd100,2019-06-30,00-12,0.0,13.0,34.0,5.0,8.0,17.0,38.0,...,2.3717,17,13,2.2353,4.4092,38,10,1.8235,3.4683,31


In [32]:
# Physiology

import utils_data
importlib.reload(utils_data)

from utils_data import extract_12h_physiology

data_physiology = []

for idx, (id, group) in enumerate(physiology.sort_values('id').groupby('id')):

    group['date-time'] = pd.to_datetime(group['date-time'])
    group = group.sort_values('date-time')
    group['date'] = group['date-time'].dt.date


    daily_physiology = extract_12h_physiology(group)

    data_physiology.append(daily_physiology)


data_physiology = pd.concat(data_physiology, ignore_index=True)
data_physiology.to_csv(os.path.join(destination, 'data-12h-physiology' + '.csv'), index=False)

In [98]:
print(data_physiology.shape, daily_physiology.shape)
print(daily_physiology.columns.values)
display(daily_physiology)
display(data_physiology)

(2497, 11) (3, 11)
['id' 'date' '12h' 'body-temperature' 'body-weight'
 'diastolic-blood-pressure' 'heart-rate' 'muscle-mass'
 'systolic-blood-pressure' 'total-body-water' 'skin-temperature']


Unnamed: 0,id,date,12h,body-temperature,body-weight,diastolic-blood-pressure,heart-rate,muscle-mass,systolic-blood-pressure,total-body-water,skin-temperature
0,fd100,2019-06-27,00-12,36.6996,55.8,76.0,54.0,37.5,153.0,48.7,
1,fd100,2019-06-28,12-24,36.6784,,74.0,61.0,,138.0,,
2,fd100,2019-06-29,12-24,36.24,,,,,,,


Unnamed: 0,id,date,12h,body-temperature,body-weight,diastolic-blood-pressure,heart-rate,muscle-mass,systolic-blood-pressure,total-body-water,skin-temperature
0,0697d,2019-06-28,12-24,36.1864,86.3,82.0,42.0,64.5,165.0,50.6,
1,0697d,2019-06-29,12-24,36.9148,86.1,78.5,60.0,64.6,157.0,51.2,
2,0697d,2019-06-30,12-24,35.9532,,80.5,52.5,,152.0,,
3,099bc,2019-05-15,00-12,,53.3,,,39.8,,54.0,
4,099bc,2019-05-15,12-24,37.2293,,75.5,82.5,,127.0,,
...,...,...,...,...,...,...,...,...,...,...,...
2492,f220c,2019-06-23,12-24,36.2770,,70.0,73.0,,111.0,,33.730
2493,f220c,2019-06-29,00-12,37.0362,,68.0,84.0,,110.0,,35.381
2494,fd100,2019-06-27,00-12,36.6996,55.8,76.0,54.0,37.5,153.0,48.7,
2495,fd100,2019-06-28,12-24,36.6784,,74.0,61.0,,138.0,,


In [33]:
# Label

import utils_data
importlib.reload(utils_data)

from utils_data import extract_12h_labels

data_labels = []

for idx, (id, group) in enumerate(labels.sort_values('id').groupby('id')):

    group['date-time'] = pd.to_datetime(group['date-time'])
    group = group.sort_values('date-time')
    group['date'] = group['date-time'].dt.date


    daily_labels = extract_12h_labels(group)

    data_labels.append(daily_labels)


data_labels = pd.concat(data_labels, ignore_index=True)
data_labels.to_csv(os.path.join(destination, 'data-24h-labels' + '.csv'), index=False)

In [34]:
print(data_labels['agitation'].sum())
print(data_labels.shape, daily_labels.shape)
print(daily_labels.columns.values)
display(daily_labels)
display(data_labels)

135.0
(950, 9) (2, 9)
['id' 'date' '12h' 'blood-pressure' 'agitation' 'body-water' 'pulse'
 'weight' 'body-temperature-label']


Unnamed: 0,id,date,12h,blood-pressure,agitation,body-water,pulse,weight,body-temperature-label
0,fd100,2019-06-27,00-12,1.0,0,0,0,0,0
1,fd100,2019-06-27,12-24,0.0,0,0,0,0,0


Unnamed: 0,id,date,12h,blood-pressure,agitation,body-water,pulse,weight,body-temperature-label
0,0697d,2019-06-28,00-12,0.0,0.0,0.0,0.0,0.0,0.0
1,0697d,2019-06-28,12-24,1.0,0.0,0.0,1.0,0.0,0.0
2,0697d,2019-06-29,00-12,0.0,0.0,0.0,0.0,0.0,0.0
3,0697d,2019-06-29,12-24,1.0,0.0,0.0,0.0,0.0,0.0
4,0697d,2019-06-30,00-12,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...
945,eca1f,2019-06-04,12-24,0.0,0.0,0.0,0.0,0.0,0.0
946,eca1f,2019-06-19,00-12,0.0,0.0,0.0,1.0,0.0,0.0
947,eca1f,2019-06-19,12-24,0.0,0.0,0.0,0.0,0.0,0.0
948,fd100,2019-06-27,00-12,1.0,0.0,0.0,0.0,0.0,0.0


In [37]:
import utils_data
importlib.reload(utils_data)

from utils_data import add_agitation_next_12h

data = data_activity.merge(
    data_physiology, on=['id', 'date', '12h'], how='outer').merge(
        data_labels, on=['id', 'date', '12h'], how='outer').merge(
            demographics, on='id', how='left')

data = add_agitation_next_12h(data)
data = data[data['agitation-next'] != -10]


data.to_csv(os.path.join(destination, 'data-12h' + '.csv'), index=False)
display(data)
print(data.columns)
print(data['agitation'].sum(), data['agitation-next'].sum())

Unnamed: 0,id,date,12h,back-door,bathroom,bedroom,fridge-door,front-door,hallway,kitchen,...,blood-pressure,agitation,body-water,pulse,weight,body-temperature-label,age,sex,agitation-next,agitation-four
0,0697d,2019-06-28,00-12,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,85,1,0.0,0
1,0697d,2019-06-28,12-24,14.0,7.0,24.0,23.0,28.0,40.0,106.0,...,1.0,0.0,0.0,1.0,0.0,0.0,85,1,0.0,0
2,0697d,2019-06-29,00-12,0.0,7.0,17.0,0.0,8.0,22.0,36.0,...,0.0,0.0,0.0,0.0,0.0,0.0,85,1,0.0,0
3,0697d,2019-06-29,12-24,2.0,4.0,9.0,8.0,15.0,35.0,84.0,...,1.0,0.0,0.0,0.0,0.0,0.0,85,1,0.0,0
4,0697d,2019-06-30,00-12,2.0,14.0,43.0,0.0,2.0,22.0,38.0,...,0.0,0.0,0.0,0.0,0.0,0.0,85,1,0.0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5525,fd100,2019-06-28,00-12,0.0,22.0,41.0,0.0,16.0,31.0,52.0,...,,,,,,,100,0,,-10
5526,fd100,2019-06-28,12-24,0.0,10.0,50.0,7.0,5.0,27.0,93.0,...,,,,,,,100,0,,-10
5527,fd100,2019-06-29,00-12,0.0,10.0,21.0,7.0,3.0,13.0,32.0,...,,,,,,,100,0,,-10
5528,fd100,2019-06-29,12-24,0.0,23.0,35.0,20.0,12.0,48.0,78.0,...,,,,,,,100,0,,-10


Index(['id', 'date', '12h', 'back-door', 'bathroom', 'bedroom', 'fridge-door',
       'front-door', 'hallway', 'kitchen', 'lounge', 'total-events',
       'unique-locations', 'active-location-ratio', 'private-to-public-ratio',
       'location-entropy', 'location-dominance-ratio', 'back-and-forth-count',
       'num-transitions', 'back-door-count-max', 'back-door-count-mean',
       'back-door-count-std', 'back-door-count-sum', 'bathroom-count-max',
       'bathroom-count-mean', 'bathroom-count-std', 'bathroom-count-sum',
       'bedroom-count-max', 'bedroom-count-mean', 'bedroom-count-std',
       'bedroom-count-sum', 'fridge-door-count-max', 'fridge-door-count-mean',
       'fridge-door-count-std', 'fridge-door-count-sum',
       'front-door-count-max', 'front-door-count-mean', 'front-door-count-std',
       'front-door-count-sum', 'hallway-count-max', 'hallway-count-mean',
       'hallway-count-std', 'hallway-count-sum', 'kitchen-count-max',
       'kitchen-count-mean', 'kitchen-cou