# Merge ActiGraph and Position Data

In [1]:
import h5py
import os
import json
import numpy as np
import pandas as pd

from tqdm.notebook import tqdm
import paat

# Set file path to relevant files
ACTIGRAPH_FILEPATH = os.path.join(os.sep, 'run', 'media', 'msw', 'LaCie', 'ACTIGRAPH_TU7.hdf5')
ACTIWAVE_FILEPATH = os.path.join(os.sep, 'run', 'media', 'msw', 'LaCie1', 'ACTIWAVE_TU7.hdf5')
BEDTIME_FILEPATH = os.path.join(os.sep, 'run', 'media', 'msw', 'LaCie', 'BEDTIME_TU7.hdf5')

INFO:tensorflow:Enabling eager execution
INFO:tensorflow:Enabling v2 tensorshape
INFO:tensorflow:Enabling resource variables
INFO:tensorflow:Enabling tensor equality
INFO:tensorflow:Enabling control flow v2


## Identify subjects that have ActiGraph and ActiWave data

### Invalid subjects

Some of the subjects in both data sets have invalid data and need therefore to be removed. In the following, the subjects are stated with the reason for removal.

In [2]:
invalid_subjects = ['90013013', # actiwave data invalid
                    '90022619', # actiwave data invalid
                    '90097429', # actiwave z axis too high
                    '90107724', # actiwave data flat
                    '90265628', # actiwave data flat
                    '90277429', # actiwave data measurement errors in beginning
                    '90352827', # actiwave y and z invalid
                    '90358126', # actiwave data flat
                    '90359935', # actiwave data measurement errors
                    '90551323', # actiwave flat
                    '90631928', # actiwave measurements error
                    '90821020', # actiwave measurements error
                    '90884635', # actiwave measurements error
                    '90936734', # actiwave flat
                    '90952429', # actiwave measurements error
                    '92108626', # actiwave flat
                    '92280425', # actiwave measurements error
                    '92327831', # actiwave flat
                    '92355428', # actiwave flat
                    '92504323', # actiwave measurements error
                    '93208931', # actiwave measurements error
                    '90165829', # no epoch data
                    '90416626', # no inclinometer data
                    '90320317', # has only 3.4672222222222224 of recording time
                    '90385429', # has only 0.8555555555555555 of recording time
                    ]

### Load subjects from ActiGraph file

In [3]:
with h5py.File(ACTIGRAPH_FILEPATH, 'r') as actigraph_file:
    actigraph_subjects = set(actigraph_file.keys())

### Load subjects from ActiWave file

In [4]:
with h5py.File(ACTIWAVE_FILEPATH, 'r') as actiwave_file:
    actiwave_subjects = set(actiwave_file.keys())

### Calculate intersection

In [5]:
relevant_subjects = list(actigraph_subjects & actiwave_subjects)
relevant_subjects.sort()
print("{} subjects have both ActiGraph and ActiWave data".format(len(relevant_subjects)))

relevant_subjects = [subject for subject in relevant_subjects if subject not in invalid_subjects]
print("{} of these subjects have valid data".format(len(relevant_subjects)))

608 subjects have both ActiGraph and ActiWave data
583 of these subjects have valid data


### Find all existing positions

In [6]:
inclinometer_labels = set()

with h5py.File(ACTIWAVE_FILEPATH, 'r') as actiwave_file:
    for subject in relevant_subjects:
        positions_json = json.loads(actiwave_file[subject].attrs["Annotations"])
        _, _, positions = zip(*list(positions_json.values()))

        inclinometer_labels.update(positions)
        
label_dict = {label:_class for _class, label in enumerate(inclinometer_labels)}

## Merge data from both devices

### Helper functions

In [7]:
def process_inclinometer_str(json_str, time, label_dict, time_res=1e3):

    positions_lst = json.loads(json_str)

    n_samples = time.shape[0]
    values = np.full(n_samples, np.nan)
    start = time[0]

    for offset, duration, position in list(positions_lst.values()):
        t_1 = start + int(offset * time_res)
        t_2 = start + int((offset + duration) * time_res)
        values[((time >= t_1) & (time <= t_2))] = label_dict[position]

    return values


def segment(time, values, meta, start, stop, start_id="Start_Time", n_samples_id="Number_Of_Samples"):
    segment_idx = (time >= start) & (time < stop)
    
    time = time[segment_idx]
    values = values[segment_idx]
    
    meta[start_id] = time[0].astype(int)
    meta[n_samples_id] = time.shape[0]
    
    return time, values, meta


def get_start_stop_actiwave(grp, field):
    dset = grp[field]
    
    start = np.array(grp.attrs["Start Datetime"], dtype="datetime64[s]")
    duration = np.array(dset.attrs["NSamples"] / dset.attrs["Sample Frequency"], dtype="timedelta64[s]")
    stop = start + duration
    
    return start, stop
    

### Processing pipeline

In [8]:
# Create new empty h5 file
with h5py.File(BEDTIME_FILEPATH, 'w') as bedtime_file:
    bedtime_file.attrs["label_dict"] = json.dumps(label_dict)
    bedtime_file.attrs["readme"] = "This dataset contains the raw acceleration data from \
                                    ActiGraph devices in the X, Y and Z column as well as \
                                    the subject's position derived from the ActiWave device \
                                    in the Position column. Time information is stored in \
                                    the Time column.\n\n The information which number in \
                                    the position column corresponds to which position is \
                                    stored in the 'label_dict' attribute which holds a \
                                    mapping {position_str: position_id}. NaN values in this \
                                    column are periods when the ActiWave device did not \
                                    record data which happened according to the manual, \
                                    when the subject was in motion."
    
for subject in tqdm(relevant_subjects):
    
    # Load ActiGraph data
    with h5py.File(ACTIGRAPH_FILEPATH, 'r') as actigraph_file:
        time, acceleration, meta = paat.io.load_dset(actigraph_file[subject], "acceleration", rescale=True)

    # Load ActiWave data
    with h5py.File(ACTIWAVE_FILEPATH, 'r') as actiwave_file:
        actiwave_start, actiwave_stop = get_start_stop_actiwave(actiwave_file[subject], "acceleration")

        positions_json = actiwave_file[subject].attrs["Annotations"]

    # Extract the positions from the ActiWave data
    n_samples = (actiwave_stop - actiwave_start).astype(int) * meta["Sample_Rate"]
    actiwave_time = paat.io._create_time_vector(actiwave_start, n_samples, meta["Sample_Rate"])
    positions = process_inclinometer_str(positions_json, actiwave_time, label_dict)
    actiwave_meta = {"Number_Of_Samples": n_samples, 
                     "Sample_Rate": meta["Sample_Rate"], 
                     "Start_Time": actiwave_time[0]}

    # Find period for which we have data from both sensors
    start = max(actiwave_start, time[0])
    stop = min(actiwave_stop, time[-1])
        
    if (start > stop):
        print("Problem processing subject {}: Data does not overlap. ActiGraph data from {} to {}. ActiWave data from {} to {}".format(subject, time[0], time[-1], actiwave_start, actiwave_stop))
        continue
        
    # Extract relevant periods
    time, acceleration, meta = segment(time, acceleration, meta, start=start, stop=stop)
    _, positions, _ = segment(actiwave_time, positions, actiwave_meta, start=start, stop=stop)

    # Store data as a dataframe
    df = pd.DataFrame({"Time": time, 
                       "X": acceleration[:,1], 
                       "Y": acceleration[:,0],
                       "Z": acceleration[:,2],
                       "Position": positions})
    
    # Save data to new file
    df.to_hdf(BEDTIME_FILEPATH, key="subject" + subject)


  0%|          | 0/583 [00:00<?, ?it/s]

Problem processing subject 90248124: Data does not overlap. ActiGraph data from 2016-03-08T00:00:00.000 to 2016-03-14T23:59:52.990. ActiWave data from 2016-03-04T10:58:00 to 2016-03-05T14:42:23
