In [82]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
import math

from os import listdir, walk
from os.path import isfile, join

from datetime import datetime, timedelta

In [3]:
from sklearn.preprocessing import MinMaxScaler

# Define Timestamp Methods

In [4]:
def calc_sec(time):
    hms = time.split(':')
    hms = [float(x) for x in hms]
    sec = hms[2] + hms[1]*60 + hms[0]*3600
    sec = round(sec,3)
    return sec

In [5]:
def calc_ts(sec):
    ts = ''
    hr = int(sec/3600)
    mn = int((sec - (hr*3600))/60)
    sc = sec - (hr*3600) - (mn*60)
    sc = round(sc,3)
    ts += str(hr) + ':' + str(mn) + ':' + str(sc)
    # print(ts)
    return ts

In [6]:
def calc_t_period(dates,secs):
    t_period = []
    
    start_sec = secs[0]
    prev_sec = secs[0]
    prev_date = dates[0]

    for i in range(len(secs)):
        curr_sec = secs[i]
        diff_sec = curr_sec - prev_sec
        curr_date = dates[i]
        
        if((diff_sec>3.0) and (curr_date==prev_date)):
            t_period.append([curr_date,start_sec,prev_sec])
            start_sec = curr_sec
        elif(curr_date!=prev_date):
            t_period.append([prev_date,start_sec,prev_sec])
            start_sec = curr_sec
            prev_date = curr_date
        elif(i==len(secs)-1):
            t_period.append([curr_date,start_sec,curr_sec])

        prev_sec = curr_sec
    
    return t_period

# Load Dataset

In [7]:
# Retrieve file directories from Google Drive
mypath = '../../DDC_Data/raw/'
basepath = '../../'

dir_ = [f for f in walk(mypath)]
# print(dir_)

dir = list(dir_[0])
dir[1] = sorted(dir[1])

outer_path = dir[0]
sub_path = dir[1]

folders = [join(outer_path,d) for d in sub_path]

files = []
for fd in folders:
    temp_f = [f for f in listdir(fd) if isfile(join(fd, f)) and f[-3:]=='csv' and f[5:9]!='data' and f[:4]==fd[-4:]]
    temp_f = sorted(temp_f)

## Retrieve All Timestamp Periods from a File

In [8]:
all_subjects = []

for i in range(1001,1015):
    all_subjects.append(str(i))

for i in range(3001,3007):
    all_subjects.append(str(i))

In [67]:
def load_timer(subject_id):
  # Configure starting and ending time values
    sid_dir = mypath + subject_id
    sid_files = [f for f in listdir(sid_dir) if 'history_amdtimer' in f]

    sid_filepath = sid_dir + '/' + sid_files[0]

    # Timestamp periods dataframe
    timer_df = pd.read_csv(sid_filepath, header=None, names=['sid','raw_label', 'timestamp', 'duration','label'])

    filtered_timer = [i for i in timer_df['sid'] if i==int(subject_id)]

    timer_filt = timer_df[timer_df['sid'].isin(filtered_timer)]
    timer_filt = timer_filt.reset_index(drop=True)
    
    timer_label = []
    
    for i in range(len(timer_filt)):
        if(timer_filt.loc[i]['raw_label']=='upstairs' or 
          timer_filt.loc[i]['raw_label']=='downstairs'):
            timer_label.append('walk')
        else:
            timer_label.append(timer_filt.loc[i]['raw_label'])

    timer_filt['label'] = pd.Series(timer_label)
    
    datetime_format = '%Y-%m-%d %H:%M:%S.%f'
    timer_filt['time_start'] = timer_filt['timestamp'].apply(lambda x: datetime.strptime(x, datetime_format))
    
    time_format = '%H:%M:%S'
    zero_date = datetime(1900, 1, 1)
    
    timer_filt['duration'] = timer_filt['duration'].apply(lambda x: datetime.strptime(x, time_format)-zero_date)
    
    for i in range(timer_filt.shape[0]):
        timer_filt.loc[i, 'time_end'] = timer_filt.loc[i, 'time_start'] + timer_filt.loc[i, 'duration']

#     print(timer_filt)
    
    return timer_filt

## Create Dataframe of ACC and HR

In [52]:
def load_acc(subject_id, start_time, end_time):
    # Load accelerations
    acc_filepath = mypath + '/' + subject_id + '/' + subject_id + '-log_acc.csv'

    df = pd.read_csv(acc_filepath, header=None, names=['x','y','z','timestamp'])
    
    datetime_format = '%Y-%m-%d %H:%M:%S.%f'
    df['timestamp'] = df['timestamp'].apply(lambda x: datetime.strptime(x, datetime_format))

    filtered = [r for r in df['timestamp'] if r>=start_time and r<=end_time]

    df_filt = df[df['timestamp'].isin(filtered)]
    df_filt = df_filt.reset_index(drop=True)

    df_filt['ID'] = pd.Series([subject_id for i in range(len(df_filt))])
    
    cols = ['ID','timestamp','x','y','z']
    df_filt = df_filt[cols]

    return df_filt

In [56]:
def load_hr(subject_id, start_time, end_time):
    # Load heart rate
    hr_filepath = mypath + '/' + subject_id + '/' + subject_id + '-log_hr.csv'

    df2 = pd.read_csv(hr_filepath, header=None, names=['hr','timestamp'])
    
    datetime_format = '%Y-%m-%d %H:%M:%S.%f'
    df2['timestamp'] = df2['timestamp'].apply(lambda x: datetime.strptime(x, datetime_format))

    filtered = [r for r in df2['timestamp'] if r>=start_time and r<=end_time]

    df_hr = df2[df2['timestamp'].isin(filtered)]
    df_hr = df_hr.reset_index(drop=True)

    cols = ['timestamp','hr']
    df_hr = df_hr[cols]

    return df_hr

In [59]:
def merge_acc_and_hr(df_filt, df_hr):
    # Fill in missing HRs
    hr_cnt = 0

    for i in range(len(df_filt)):
        hr_time = df_hr.loc[hr_cnt,'timestamp']
        filt_time = df_filt.loc[i,'timestamp']

        if(hr_time<=filt_time):
            if(hr_cnt<len(df_hr)-1):
                hr_cnt += 1
        df_filt.loc[i,'HR'] = df_hr.loc[hr_cnt,'hr']

    # Normalize by dividing by g (standard gravity)
    g = 9.8
    df_filt.loc[:,'x'] = df_filt['x'].apply(lambda x: x/g)
    df_filt.loc[:,'y'] = df_filt['y'].apply(lambda x: x/g)
    df_filt.loc[:,'z'] = df_filt['z'].apply(lambda x: x/g)
    
    cols = ['x','y','z']
    xyz_ = df_filt[cols].to_dict('split')['data']
    xyz_new = MinMaxScaler().fit_transform(xyz_)
#     print(np.array(xyz_new).shape)

    for i in range(len(cols)):
        df_filt[cols[i]] = pd.Series(xyz_new.transpose()[i])
        
#     print(df_filt['x'])

    return df_filt

# Calculate Activity Index

In [13]:
std_i_bar = [0.00349329,0.00465817,0.00543154]
std_i_bar = np.array(std_i_bar)

In [14]:
def equation_bai(X_i):
    all_std = []
    
    std_i = np.std(X_i,axis=0)
    diff_std = std_i**2 - std_i_bar**2
    diff_std = (diff_std + 1) / (std_i_bar**2 + 1)
    
    diff_std_ = std_i**2

    all_std.append(diff_std)
    
    all_std = np.array(all_std)
    
    ai = np.sum(all_std**2,axis=1)/3
    ai[ai<0] = 0
    ai = np.sqrt(ai)
    
    return ai

In [15]:
def calc_ai(df1):
    H = 10
    ai1 = []

    for i in range(len(df1)):
        xyz_val = []
        if(i-H>=0):
            for j in range(H,0,-1):
                xyz_val.append([df1.loc[i-j,'x'],df1.loc[i-j,'y'],df1.loc[i-j,'z']])
            ai_val = float(equation_bai(xyz_val))
            ai1.append(ai_val)
        else:
            ai1.append(1)

    return ai1

# Colors for Each Acitivity

In [17]:
def prepare_color_labels(ts_, labels):
  
    accum = 0
    ts = []
    for x in ts_:
        accum += x
        ts.append(round(accum,3))

    lb_set = set()
    for x in labels:
        lb_set.add(x)

    lb_ = list(lb_set)

    set_cnt = []
    for i in range(len(lb_)):
        set_cnt.append(0)

    lb = []
    lb.append('NaN')

    for x in labels:
        for i in range(len(lb_)):
            if(lb_[i]==x and set_cnt[i]!=1 and lb_[i]!='NaN'):
                set_cnt[i] = 1
                lb.append(x)

    colors = ['#808080', '#E6194B', '#3CB44B', '#FFE119', '#4363D8', '#F58231',
            '#911EB4', '#46F0F0', '#F032E6', '#BCF60C', '#008080', '#E6BEFF', 
            '#9A6324', '#800000', '#AAFFC3', '#808000', '#000075']

    color_dict = {}
    for i in range(len(lb)):
        color_dict[lb[i]] = colors[i]

    #   print(color_dict)

    lb_color = []
    for x in labels:
        lb_color.append(color_dict[x])

    return ts, lb_color

# Dataframe List Grouped by Label

In [141]:
def group_dataframe_by_label(df1, df_timer, subject_id, label_list):
    df_list = [pd.DataFrame() for i in range(len(label_list))]
    period = [[] for i in range(len(label_list))]
    
    for lb in range(len(label_list)):
        print(label_list[lb])
        for i in range(df_timer.shape[0]):
            start = 0
            end = 0
            
            if(df_timer.loc[i, 'label']==label_list[lb]):
                t_a = df_timer.loc[i, 'time_start']
                t_b = df_timer.loc[i, 'time_end']

                for j in range(df1.shape[0]):    
                    if(df1.loc[j, 'ID']==subject_id):
                        if(j>0 and df1.loc[j, 'timestamp']<=t_b and df1.loc[j-1, 'timestamp']<t_b):
                            end = j

                for j in reversed(range(df1.shape[0])):
                    if(df1.loc[j, 'ID']==subject_id):
                        if(j<df1.shape[0]-1 and df1.loc[j, 'timestamp']>=t_a and df1.loc[j+1, 'timestamp']>t_a):
                            start = j
                            
#                 print(df1[start:end].head())

                period[lb].append([start, end])
                
                if(df_list[lb].empty):
                    df_list[lb] = df1.loc[start:end+1]
                else:
                    df_list[lb].append(df1.loc[start:end+1], ignore_index=True)

#     for label in label_list:
#         df_list[label] = df_list[label].reset_index(drop=True)

    return df_list, period

# Get X and y from Dataset for Each Subject

In [23]:
def get_data(df_list, label_dict):
    feature_cols = ['x','y','z']
    count = 0
    
    y_all = []
    ts_all = []
    hr_all = []
    
    for x in label_dict:
#         print(x)
    
        X_series = df_list[label_dict[x]][feature_cols]

        X_ = X_series.values.reshape((len(X_series),3))
        y_ = np.array([label_dict[x] for i in range(len(df_list[label_dict[x]]))])
        ts_ = np.array(df_list[label_dict[x]]['timestamp'])
        hr_ = np.array(df_list[label_dict[x]]['HR'])

          # 'downstairs': 0,
          # 'sit': 1,
          # 'sleep': 2,
          # 'stand': 3,
            
        y_all.append(y_)
        ts_all.append(ts_)
        hr_all.append(hr_)
        
        if(count==0):
            X_all = X_
            count += 1

        else:
            X_all = np.vstack((X_all, X_))

    y_all = np.hstack(y_all)
    ts_all = np.hstack(ts_all)
    hr_all = np.hstack(hr_all)
    
    return np.array(X_all), np.array(y_all), np.array(ts_all), np.array(hr_all)

In [24]:
def get_sorted_data(X_i, y_i, ts_i, hr_i, subj_i):
    df_ = pd.DataFrame({
        'ID': subj_i,
        'timestamp': ts_i,
        'x': [x[0] for x in X_i],
        'y': [x[1] for x in X_i],
        'z': [x[2] for x in X_i],
        'HR': hr_i,
        'label': y_i
    })
    
    df_sorted = df_.sort_values(by=['timestamp'])
    
    cols = ['x','y','z']
    X_i = df_sorted[cols].values.tolist()
    y_i = df_sorted['label'].values.tolist()
    ts_i = df_sorted['timestamp'].values.tolist()
    hr_i = df_sorted['HR'].values.tolist()
    subj_i = df_sorted['ID'].values.tolist()
    
    return X_i, y_i, ts_i, hr_i, subj_i

# Function Call *

In [120]:
def load_all_data(all_subjects):
    
    for subject_id in all_subjects:
        print("Loading {0}'s data".format(subject_id))

        df_timer = load_timer(subject_id)
        
        start_time = df_timer.loc[0, 'time_start']
        end_time = df_timer.loc[df_timer.shape[0]-1, 'time_end']
        
        df_filt = load_acc(subject_id, start_time, end_time)
        df_hr = load_hr(subject_id, start_time, end_time)

        df1 = merge_acc_and_hr(df_filt, df_hr)
        ai1 = calc_ai(df1)

        df1['AI'] = pd.Series(ai1)
#         print(df1)

        label_list = ['sit', 'sleep', 'stand', 'walk']
        
        # get a list of dataframe in which there are 4 types of activity
        df_list = group_dataframe_by_label(df1, df_timer, subject_id, label_list)

        print(df_list)

In [142]:
load_all_data(all_subjects)

Loading 1001's data
sit
sleep
stand
walk
([       ID               timestamp         x         y         z          HR  \
229  1001 2019-01-14 14:53:07.834  0.346669  0.532096  0.427995  105.820340   
230  1001 2019-01-14 14:53:07.996  0.345733  0.531776  0.429373  105.820340   
231  1001 2019-01-14 14:53:08.156  0.346329  0.532523  0.428860  105.820340   
232  1001 2019-01-14 14:53:08.318  0.345287  0.527082  0.428732  105.820340   
233  1001 2019-01-14 14:53:08.477  0.345031  0.527936  0.426841  105.820340   
234  1001 2019-01-14 14:53:08.637  0.344861  0.526335  0.426841  105.274170   
235  1001 2019-01-14 14:53:08.798  0.344011  0.528149  0.425944  105.274170   
236  1001 2019-01-14 14:53:08.959  0.344861  0.528149  0.424277  105.274170   
237  1001 2019-01-14 14:53:09.119  0.344861  0.526975  0.424406  105.274170   
238  1001 2019-01-14 14:53:09.280  0.344606  0.526975  0.425046  105.274170   
239  1001 2019-01-14 14:53:09.441  0.344521  0.526869  0.426456  105.274170   
240  1001

sit
sleep
stand
walk
([       ID               timestamp         x         y         z         HR  \
230  1002 2019-01-14 15:19:44.955  0.476983  0.337864  0.358837  95.509050   
231  1002 2019-01-14 15:19:45.116  0.473968  0.346275  0.347298  95.509050   
232  1002 2019-01-14 15:19:45.278  0.456845  0.350610  0.314475  94.395070   
233  1002 2019-01-14 15:19:45.437  0.450213  0.299712  0.398968  94.395070   
234  1002 2019-01-14 15:19:45.598  0.525459  0.348616  0.374992  94.395070   
235  1002 2019-01-14 15:19:45.759  0.540774  0.374976  0.404609  94.395070   
236  1002 2019-01-14 15:19:45.920  0.534744  0.344454  0.383069  94.395070   
237  1002 2019-01-14 15:19:46.080  0.531127  0.324251  0.413071  94.395070   
238  1002 2019-01-14 15:19:46.242  0.526303  0.280722  0.567312  94.395070   
239  1002 2019-01-14 15:19:46.402  0.488680  0.288353  0.449869  92.331620   
240  1002 2019-01-14 15:19:46.564  0.539568  0.329106  0.411276  92.331620   
241  1002 2019-01-14 15:19:46.723  0.5118

sit
sleep
stand
walk
([       ID               timestamp         x         y         z        HR  \
209  1003 2019-01-14 15:46:46.535  0.390321  0.329337  0.277852  95.16817   
210  1003 2019-01-14 15:46:46.696  0.340212  0.415595  0.330619  95.16817   
211  1003 2019-01-14 15:46:46.856  0.384398  0.354353  0.303838  95.16817   
212  1003 2019-01-14 15:46:47.017  0.379040  0.352654  0.328009  95.16817   
213  1003 2019-01-14 15:46:47.178  0.378664  0.344724  0.326648  95.16817   
214  1003 2019-01-14 15:46:47.336  0.380450  0.349822  0.323016  95.02710   
215  1003 2019-01-14 15:46:47.499  0.380074  0.345668  0.323016  95.02710   
216  1003 2019-01-14 15:46:47.659  0.380356  0.352087  0.322449  95.02710   
217  1003 2019-01-14 15:46:47.819  0.337298  0.333585  0.359443  95.02710   
218  1003 2019-01-14 15:46:47.981  0.470045  0.403346  0.448183  95.02710   
219  1003 2019-01-14 15:46:48.140  0.361083  0.361999  0.304633  95.02710   
220  1003 2019-01-14 15:46:48.301  0.382142  0.345385

sit
sleep
stand
walk
([       ID               timestamp         x         y         z         HR  \
208  1004 2019-01-14 16:06:29.382  0.551569  0.584536  0.473245  50.649150   
209  1004 2019-01-14 16:06:29.543  0.545661  0.558596  0.474663  48.626522   
210  1004 2019-01-14 16:06:29.704  0.539097  0.557107  0.504137  48.626522   
211  1004 2019-01-14 16:06:29.867  0.542707  0.571132  0.499409  48.626522   
212  1004 2019-01-14 16:06:30.025  0.545989  0.559837  0.498463  48.626522   
213  1004 2019-01-14 16:06:30.186  0.545770  0.560582  0.501458  48.626522   
214  1004 2019-01-14 16:06:30.348  0.545114  0.558720  0.499567  48.626522   
215  1004 2019-01-14 16:06:30.508  0.545880  0.559093  0.500985  48.626522   
216  1004 2019-01-14 16:06:30.670  0.544458  0.559806  0.500985  48.831540   
217  1004 2019-01-14 16:06:30.829  0.545005  0.560303  0.500828  48.831540   
218  1004 2019-01-14 16:06:30.990  0.545223  0.556704  0.499882  48.831540   
219  1004 2019-01-14 16:06:31.150  0.5505

sit
sleep
stand
walk
([       ID               timestamp         x         y         z         HR  \
234  1005 2019-01-14 16:22:45.731  0.719022  0.349010  0.509706  86.112790   
235  1005 2019-01-14 16:22:45.892  0.718881  0.348707  0.508340  86.112790   
236  1005 2019-01-14 16:22:46.054  0.719872  0.346288  0.507884  86.112790   
237  1005 2019-01-14 16:22:46.213  0.718739  0.346590  0.507884  86.112790   
238  1005 2019-01-14 16:22:46.376  0.719872  0.346590  0.508340  86.002520   
239  1005 2019-01-14 16:22:46.536  0.718172  0.348102  0.507884  86.002520   
240  1005 2019-01-14 16:22:46.697  0.722423  0.365643  0.510844  86.002520   
241  1005 2019-01-14 16:22:46.858  0.730181  0.358083  0.528149  86.002520   
242  1005 2019-01-14 16:22:47.017  0.737265  0.341146  0.509706  86.002520   
243  1005 2019-01-14 16:22:47.178  0.803153  0.459096  0.569135  86.002520   
244  1005 2019-01-14 16:22:47.339  0.746334  0.335702  0.550236  86.002520   
245  1005 2019-01-14 16:22:47.500  0.7466

sit
sleep
stand
walk
([       ID               timestamp         x         y         z          HR  \
202  1006 2019-01-14 16:37:03.435  0.687207  0.370984  0.431738  103.723180   
203  1006 2019-01-14 16:37:03.596  0.690074  0.390179  0.429639  103.723180   
204  1006 2019-01-14 16:37:03.757  0.697022  0.371532  0.435690  103.723180   
205  1006 2019-01-14 16:37:03.917  0.706507  0.377199  0.431985  103.723180   
206  1006 2019-01-14 16:37:04.078  0.663386  0.356451  0.425687  103.723180   
207  1006 2019-01-14 16:37:04.239  0.677612  0.370253  0.423217  103.723180   
208  1006 2019-01-14 16:37:04.402  0.669010  0.365134  0.434702  103.723180   
209  1006 2019-01-14 16:37:04.560  0.695148  0.409739  0.421859  104.284190   
210  1006 2019-01-14 16:37:04.720  0.594899  0.376285  0.418154  104.284190   
211  1006 2019-01-14 16:37:04.883  0.641329  0.332594  0.409386  104.284190   
212  1006 2019-01-14 16:37:05.043  0.584753  0.410196  0.431368  104.284190   
213  1006 2019-01-14 16:37:05

sit
sleep
stand
walk
([       ID               timestamp         x         y         z          HR  \
217  1007 2019-01-14 16:57:04.867  0.411714  0.365658  0.558153   80.478510   
218  1007 2019-01-14 16:57:05.025  0.408771  0.364553  0.552184   80.478510   
219  1007 2019-01-14 16:57:05.188  0.401589  0.366541  0.546612   80.478510   
220  1007 2019-01-14 16:57:05.348  0.415953  0.367867  0.560011   80.478510   
221  1007 2019-01-14 16:57:05.509  0.405121  0.370629  0.552714   80.478510   
222  1007 2019-01-14 16:57:05.668  0.405475  0.363890  0.550061   80.478510   
223  1007 2019-01-14 16:57:05.828  0.403591  0.367315  0.550061   79.946075   
224  1007 2019-01-14 16:57:05.989  0.403826  0.366983  0.549929   79.946075   
225  1007 2019-01-14 16:57:06.150  0.405710  0.366541  0.550725   79.946075   
226  1007 2019-01-14 16:57:06.311  0.403944  0.365658  0.549133   79.946075   
227  1007 2019-01-14 16:57:06.471  0.405357  0.365547  0.550459   79.946075   
228  1007 2019-01-14 16:57:06

sit
sleep
stand
walk
([       ID               timestamp         x         y         z         HR  \
200  1008 2019-01-14 17:17:57.838  0.621329  0.463023  0.415275  79.409660   
201  1008 2019-01-14 17:17:58.000  0.630317  0.460806  0.424680  79.409660   
202  1008 2019-01-14 17:17:58.161  0.633034  0.445088  0.419815  79.409660   
203  1008 2019-01-14 17:17:58.320  0.623001  0.482771  0.423707  83.632965   
204  1008 2019-01-14 17:17:58.482  0.638050  0.450529  0.403600  83.632965   
205  1008 2019-01-14 17:17:58.641  0.657488  0.424937  0.502189  83.632965   
206  1008 2019-01-14 17:17:58.803  0.671700  0.412846  0.385439  83.632965   
207  1008 2019-01-14 17:17:58.962  0.649545  0.416877  0.385763  83.632965   
208  1008 2019-01-14 17:17:59.123  0.647246  0.419093  0.388357  83.632965   
209  1008 2019-01-14 17:17:59.283  0.643066  0.443073  0.401005  83.632965   
210  1008 2019-01-14 17:17:59.445  0.614223  0.463829  0.441868  85.871140   
211  1008 2019-01-14 17:17:59.607  0.6248

sit
sleep
stand
walk


KeyboardInterrupt: 