In [2]:
import os
import numpy as np
import pandas as pd
import warnings
warnings.filterwarnings('ignore')
from IPython.display import display
import importlib

In [3]:
activity = pd.read_csv('Activity.csv')
physiology = pd.read_csv('Physiology.csv')
labels = pd.read_csv('Labels.csv')
sleep = pd.read_csv('Sleep.csv')
demographics = pd.read_csv('Demographics.csv')

activity = activity.rename(columns={
    'patient_id': 'id',
    'date': 'date-time',
    'location_name': 'location'})

physiology = physiology.rename(columns={
    'patient_id': 'id',
    'date': 'date-time',
    'device_type': 'type'})

labels = labels.rename(columns={
    'patient_id': 'id',
    'date': 'date-time'})

sleep = sleep.rename(columns={
    'patient_id': 'id',
    'date': 'date-time'})

demographics = demographics.rename(columns={
    'patient_id': 'id'})

# Standardize location names
activity['location'] = activity['location'].replace({
    'Back Door': 'back-door',
    'Bathroom': 'bathroom',
    'Bedroom': 'bedroom',
    'Fridge Door': 'fridge-door',
    'Front Door': 'front-door',
    'Hallway': 'hallway',
    'Kitchen': 'kitchen',
    'Lounge': 'lounge'
})


physiology['type'] = physiology['type'].replace({
    'Body Temperature': 'body-temperature',
    'Systolic blood pressure': 'systolic-blood-pressure',
    'Diastolic blood pressure': 'diastolic-blood-pressure',
    'Heart rate': 'heart-rate',
    'Body weight': 'body-weight',
    'O/E - muscle mass': 'muscle-mass',
    'Total body water': 'total-body-water',
    'Skin Temperature': 'skin-temperature'
})


labels['type'] = labels['type'].replace({
    'Blood pressure': 'blood-pressure',
    'Agitation': 'agitation',
    'Body water': 'body-water',
    'Pulse': 'pulse',
    'Weight': 'weight',
    'Body temperature': 'body-temperature-label'
})


demographics['sex'] = demographics['sex'].map({'Female': 0, 'Male': 1})
demographics['age'] = demographics['age'].map({'(70, 80]': 75, '(80, 90]': 85, '(90, 110]': 100})


In [4]:
print(activity.shape,
      physiology.shape,
      labels.shape,
      sleep.shape,
      demographics.shape)

(1030559, 3) (17680, 5) (608, 3) (461423, 6) (56, 3)


In [5]:
print(activity.columns.values)
print(physiology.columns.values)
print(labels.columns.values)
print(sleep.columns.values)
print(demographics.columns.values)

['id' 'location' 'date-time']
['id' 'date-time' 'type' 'value' 'unit']
['id' 'date-time' 'type']
['id' 'date-time' 'state' 'heart_rate' 'respiratory_rate' 'snoring']
['id' 'age' 'sex']


In [6]:
print(activity.location.unique())
print(physiology.type.unique())
print(labels.type.unique())
print(sleep.state.unique())
print(demographics.age.unique(), demographics.sex.unique())

['fridge-door' 'kitchen' 'front-door' 'bedroom' 'back-door' 'bathroom'
 'lounge' 'hallway']
['body-temperature' 'systolic-blood-pressure' 'diastolic-blood-pressure'
 'heart-rate' 'body-weight' 'muscle-mass' 'total-body-water'
 'skin-temperature']
['blood-pressure' 'agitation' 'body-water' 'pulse' 'weight'
 'body-temperature-label']
['AWAKE' 'LIGHT' 'DEEP' 'REM']
[ 75  85 100] [0 1]


In [7]:
print(activity.id.unique().shape)
print(physiology.id.unique().shape)
print(labels.id.unique().shape)
print(sleep.id.unique().shape)
print(demographics.id.unique().shape)

(56,)
(56,)
(49,)
(17,)
(56,)


In [8]:
(labels[labels['id'].isin(sleep['id'].unique())]['type'] == 'agitation').sum()

np.int64(36)

In [9]:
# Activity

import utils_data
importlib.reload(utils_data)

from utils_data import extract_24h_count, extract_24h_contextual, extract_24h_statistical

destination = '/home/ali/PycharmProjects/tihm/dataset'

data_activity = []

for idx, (id, group) in enumerate(activity.sort_values('id').groupby('id')):

    group['date-time'] = pd.to_datetime(group['date-time'])
    group = group.sort_values('date-time')
    group['date'] = group['date-time'].dt.date


    daily_count = extract_24h_count(group)
    daily_contextual = extract_24h_contextual(group)
    daily_statistical = extract_24h_statistical(group)

    daily = daily_count.merge(daily_contextual, on=['id', 'date'], how='outer').merge(daily_statistical, on=['id', 'date'], how='outer')

    data_activity.append(daily)


data_activity = pd.concat(data_activity, ignore_index=True)
data_activity.to_csv(os.path.join(destination, 'data-24h-activity' + '.csv'), index=False)

In [10]:
print(data_activity.shape, daily.shape, daily_count.shape, daily_contextual.shape, daily_statistical.shape)
# print(daily_count.columns.values)
# print(daily_contextual.columns.values)
# print(daily_statistical.columns.values)
# print(daily.columns.values)
# daily_count.head()
# daily_contextual.head()
# daily_statistical.head()
# daily.head()
display(daily_count)
display(daily_contextual)
display(daily_statistical)
display(daily)
display(data_activity)

(2722, 50) (4, 50) (4, 10) (4, 10) (4, 34)


Unnamed: 0,id,date,back-door,bathroom,bedroom,fridge-door,front-door,hallway,kitchen,lounge
0,fd100,2019-06-27,2.0,24.0,48.0,23.0,25.0,47.0,100.0,97.0
1,fd100,2019-06-28,0.0,32.0,91.0,7.0,21.0,58.0,145.0,120.0
2,fd100,2019-06-29,0.0,33.0,56.0,27.0,15.0,61.0,110.0,96.0
3,fd100,2019-06-30,0.0,27.0,86.0,8.0,20.0,43.0,120.0,110.0


Unnamed: 0,id,date,total-events,unique-locations,active-location-ratio,private-to-public-ratio,location-entropy,location-dominance-ratio,back-and-forth-count,num-transitions
0,fd100,2019-06-27,366,8,1.0,0.2951,2.5979,0.2732,47,266
1,fd100,2019-06-28,474,7,0.875,0.3808,2.404,0.3059,39,340
2,fd100,2019-06-29,398,7,0.875,0.3333,2.5599,0.2764,57,298
3,fd100,2019-06-30,414,7,0.875,0.4139,2.4143,0.2899,35,273


Unnamed: 0,id,date,back-door-count-max,back-door-count-mean,back-door-count-std,back-door-count-sum,bathroom-count-max,bathroom-count-mean,bathroom-count-std,bathroom-count-sum,...,hallway-count-std,hallway-count-sum,kitchen-count-max,kitchen-count-mean,kitchen-count-std,kitchen-count-sum,lounge-count-max,lounge-count-mean,lounge-count-std,lounge-count-sum
0,fd100,2019-06-27,2.0,2.0,,2.0,5,2.6667,1.3229,24,...,2.1108,47,21,9.0909,6.715,100,20,8.8182,4.8542,97
1,fd100,2019-06-28,,,,,11,4.0,3.5456,32,...,3.2592,58,28,10.3571,7.4277,145,15,8.5714,4.1642,120
2,fd100,2019-06-29,,,,,9,3.3,2.7101,33,...,2.9108,61,18,7.8571,5.8159,110,13,6.4,3.8877,96
3,fd100,2019-06-30,,,,,6,3.375,1.9226,27,...,2.1628,43,16,7.5,4.3665,120,12,6.875,2.5528,110


Unnamed: 0,id,date,back-door,bathroom,bedroom,fridge-door,front-door,hallway,kitchen,lounge,...,hallway-count-std,hallway-count-sum,kitchen-count-max,kitchen-count-mean,kitchen-count-std,kitchen-count-sum,lounge-count-max,lounge-count-mean,lounge-count-std,lounge-count-sum
0,fd100,2019-06-27,2.0,24.0,48.0,23.0,25.0,47.0,100.0,97.0,...,2.1108,47,21,9.0909,6.715,100,20,8.8182,4.8542,97
1,fd100,2019-06-28,0.0,32.0,91.0,7.0,21.0,58.0,145.0,120.0,...,3.2592,58,28,10.3571,7.4277,145,15,8.5714,4.1642,120
2,fd100,2019-06-29,0.0,33.0,56.0,27.0,15.0,61.0,110.0,96.0,...,2.9108,61,18,7.8571,5.8159,110,13,6.4,3.8877,96
3,fd100,2019-06-30,0.0,27.0,86.0,8.0,20.0,43.0,120.0,110.0,...,2.1628,43,16,7.5,4.3665,120,12,6.875,2.5528,110


Unnamed: 0,id,date,back-door,bathroom,bedroom,fridge-door,front-door,hallway,kitchen,lounge,...,hallway-count-std,hallway-count-sum,kitchen-count-max,kitchen-count-mean,kitchen-count-std,kitchen-count-sum,lounge-count-max,lounge-count-mean,lounge-count-std,lounge-count-sum
0,0697d,2019-06-28,14.0,7.0,24.0,23.0,28.0,40.0,106.0,80.0,...,3.6645,40,20,10.6000,5.2536,106,16,8.0,5.5976,80
1,0697d,2019-06-29,2.0,11.0,26.0,8.0,23.0,57.0,120.0,117.0,...,3.3609,57,16,8.5714,4.7021,120,21,7.8,5.5058,117
2,0697d,2019-06-30,4.0,14.0,53.0,0.0,8.0,57.0,119.0,103.0,...,2.4842,57,17,7.4375,4.6328,119,16,6.0588,4.0073,103
3,099bc,2019-05-15,12.0,17.0,31.0,27.0,13.0,32.0,71.0,6.0,...,2.2254,32,19,10.1429,5.6400,71,2,1.2,0.4472,6
4,099bc,2019-05-16,14.0,42.0,85.0,22.0,6.0,50.0,104.0,9.0,...,2.2496,50,24,8.6667,5.9595,104,4,1.8,1.3038,9
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2717,f220c,2019-06-30,0.0,12.0,61.0,24.0,18.0,32.0,0.0,0.0,...,1.9779,32,,,,,,,,
2718,fd100,2019-06-27,2.0,24.0,48.0,23.0,25.0,47.0,100.0,97.0,...,2.1108,47,21,9.0909,6.7150,100,20,8.8182,4.8542,97
2719,fd100,2019-06-28,0.0,32.0,91.0,7.0,21.0,58.0,145.0,120.0,...,3.2592,58,28,10.3571,7.4277,145,15,8.5714,4.1642,120
2720,fd100,2019-06-29,0.0,33.0,56.0,27.0,15.0,61.0,110.0,96.0,...,2.9108,61,18,7.8571,5.8159,110,13,6.4,3.8877,96


In [11]:
# Physiology

import utils_data
importlib.reload(utils_data)

from utils_data import extract_24h_physiology

data_physiology = []

for idx, (id, group) in enumerate(physiology.sort_values('id').groupby('id')):

    group['date-time'] = pd.to_datetime(group['date-time'])
    group = group.sort_values('date-time')
    group['date'] = group['date-time'].dt.date


    daily_physiology = extract_24h_physiology(group)

    data_physiology.append(daily_physiology)


data_physiology = pd.concat(data_physiology, ignore_index=True)
data_physiology.to_csv(os.path.join(destination, 'data-24h-physiology' + '.csv'), index=False)

In [12]:
print(data_physiology.shape, daily_physiology.shape)
print(daily_physiology.columns.values)
display(daily_physiology)
display(data_physiology)

(2161, 10) (3, 10)
['id' 'date' 'body-temperature' 'body-weight' 'diastolic-blood-pressure'
 'heart-rate' 'muscle-mass' 'systolic-blood-pressure' 'total-body-water'
 'skin-temperature']


Unnamed: 0,id,date,body-temperature,body-weight,diastolic-blood-pressure,heart-rate,muscle-mass,systolic-blood-pressure,total-body-water,skin-temperature
0,fd100,2019-06-27,36.6996,55.8,76.0,54.0,37.5,153.0,48.7,
1,fd100,2019-06-28,36.6784,,74.0,61.0,,138.0,,
2,fd100,2019-06-29,36.24,,,,,,,


Unnamed: 0,id,date,body-temperature,body-weight,diastolic-blood-pressure,heart-rate,muscle-mass,systolic-blood-pressure,total-body-water,skin-temperature
0,0697d,2019-06-28,36.1864,86.3,82.0,42.0,64.5,165.0,50.6,
1,0697d,2019-06-29,36.9148,86.1,78.5,60.0,64.6,157.0,51.2,
2,0697d,2019-06-30,35.9532,,80.5,52.5,,152.0,,
3,099bc,2019-05-15,37.2293,53.3,75.5,82.5,39.8,127.0,54.0,
4,099bc,2019-05-16,36.7949,52.7,78.0,71.0,,152.0,,
...,...,...,...,...,...,...,...,...,...,...
2156,f220c,2019-06-23,36.2770,,70.0,73.0,,111.0,,33.730
2157,f220c,2019-06-29,37.0362,,68.0,84.0,,110.0,,35.381
2158,fd100,2019-06-27,36.6996,55.8,76.0,54.0,37.5,153.0,48.7,
2159,fd100,2019-06-28,36.6784,,74.0,61.0,,138.0,,


In [13]:
# Label

import utils_data
importlib.reload(utils_data)

from utils_data import extract_24h_labels

data_labels = []

for idx, (id, group) in enumerate(labels.sort_values('id').groupby('id')):

    group['date-time'] = pd.to_datetime(group['date-time'])
    group = group.sort_values('date-time')
    group['date'] = group['date-time'].dt.date


    daily_labels = extract_24h_labels(group)

    data_labels.append(daily_labels)


data_labels = pd.concat(data_labels, ignore_index=True)
data_labels.to_csv(os.path.join(destination, 'data-24h-labels' + '.csv'), index=False)

In [14]:
print(data_labels['agitation'].sum())
print(data_labels.shape, daily_labels.shape)
print(daily_labels.columns.values)
display(daily_labels)
display(data_labels)

135.0
(475, 8) (1, 8)
['id' 'date' 'blood-pressure' 'agitation' 'body-water' 'pulse' 'weight'
 'body-temperature-label']


Unnamed: 0,id,date,blood-pressure,agitation,body-water,pulse,weight,body-temperature-label
0,fd100,2019-06-27,1.0,0,0,0,0,0


Unnamed: 0,id,date,blood-pressure,agitation,body-water,pulse,weight,body-temperature-label
0,0697d,2019-06-28,1.0,0.0,0.0,1.0,0.0,0.0
1,0697d,2019-06-29,1.0,0.0,0.0,0.0,0.0,0.0
2,0697d,2019-06-30,0.0,0.0,0.0,1.0,0.0,0.0
3,099bc,2019-05-20,1.0,0.0,0.0,0.0,0.0,0.0
4,099bc,2019-06-01,1.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...
470,eca1f,2019-05-23,0.0,1.0,0.0,0.0,0.0,0.0
471,eca1f,2019-05-24,0.0,0.0,0.0,1.0,0.0,0.0
472,eca1f,2019-06-04,0.0,0.0,0.0,1.0,0.0,0.0
473,eca1f,2019-06-19,0.0,0.0,0.0,1.0,0.0,0.0


In [15]:
import utils_data
importlib.reload(utils_data)

from utils_data import add_agitation_next_24h

data = data_activity.merge(
    data_physiology, on=['id', 'date'], how='outer').merge(
        data_labels, on=['id', 'date'], how='outer').merge(
            demographics, on='id', how='left')

data = add_agitation_next_24h(data)
data = data[data['agitation-next'] != -10]


data.to_csv(os.path.join(destination, 'data-24h' + '.csv'), index=False)
display(data)
print(data.columns)
print(data['agitation'].sum(), data['agitation-next'].sum())

Unnamed: 0,id,date,back-door,bathroom,bedroom,fridge-door,front-door,hallway,kitchen,lounge,...,blood-pressure,agitation,body-water,pulse,weight,body-temperature-label,age,sex,agitation-next,agitation-four
0,0697d,2019-06-28,14.0,7.0,24.0,23.0,28.0,40.0,106.0,80.0,...,1.0,0.0,0.0,1.0,0.0,0.0,85,1,0.0,0
1,0697d,2019-06-29,2.0,11.0,26.0,8.0,23.0,57.0,120.0,117.0,...,1.0,0.0,0.0,0.0,0.0,0.0,85,1,0.0,0
3,099bc,2019-05-15,12.0,17.0,31.0,27.0,13.0,32.0,71.0,6.0,...,,,,,,,85,0,,-10
4,099bc,2019-05-16,14.0,42.0,85.0,22.0,6.0,50.0,104.0,9.0,...,,,,,,,85,0,,-10
5,099bc,2019-05-17,8.0,32.0,66.0,40.0,0.0,33.0,57.0,6.0,...,,,,,,,85,0,,-10
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2790,f220c,2019-06-28,0.0,11.0,34.0,18.0,17.0,24.0,0.0,0.0,...,,,,,,,100,0,,-10
2791,f220c,2019-06-29,0.0,21.0,74.0,26.0,20.0,46.0,0.0,0.0,...,,,,,,,100,0,,-10
2793,fd100,2019-06-27,2.0,24.0,48.0,23.0,25.0,47.0,100.0,97.0,...,1.0,0.0,0.0,0.0,0.0,0.0,100,0,,-10
2794,fd100,2019-06-28,0.0,32.0,91.0,7.0,21.0,58.0,145.0,120.0,...,,,,,,,100,0,,-10


Index(['id', 'date', 'back-door', 'bathroom', 'bedroom', 'fridge-door',
       'front-door', 'hallway', 'kitchen', 'lounge', 'total-events',
       'unique-locations', 'active-location-ratio', 'private-to-public-ratio',
       'location-entropy', 'location-dominance-ratio', 'back-and-forth-count',
       'num-transitions', 'back-door-count-max', 'back-door-count-mean',
       'back-door-count-std', 'back-door-count-sum', 'bathroom-count-max',
       'bathroom-count-mean', 'bathroom-count-std', 'bathroom-count-sum',
       'bedroom-count-max', 'bedroom-count-mean', 'bedroom-count-std',
       'bedroom-count-sum', 'fridge-door-count-max', 'fridge-door-count-mean',
       'fridge-door-count-std', 'fridge-door-count-sum',
       'front-door-count-max', 'front-door-count-mean', 'front-door-count-std',
       'front-door-count-sum', 'hallway-count-max', 'hallway-count-mean',
       'hallway-count-std', 'hallway-count-sum', 'kitchen-count-max',
       'kitchen-count-mean', 'kitchen-count-std'