In [2]:
import math
import os

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from prettytable import PrettyTable
from os import listdir
from sklearn.metrics import f1_score
from datetime import datetime

from sklearn.preprocessing import MinMaxScaler
from sklearn.cluster import KMeans

In [3]:
ddc_path = 'E:/coding/Dementia_proj/src/ddc/'
os.chdir(ddc_path)

In [4]:
%run preprocessing.ipynb

In [5]:
mypath = '../../DDC_Data/raw/'
basepath = '../../'

# Get the Actual Timestamp Labels

In [6]:
def load_timer(subject_id):
    
    sid_dir = mypath + subject_id
    sid_files = [f for f in listdir(sid_dir) if 'history_amdtimer' in f]

    sid_filepath = sid_dir + '/' + sid_files[0]

    timer_df = pd.read_csv(sid_filepath, header=None, names=['sid','raw_label', 'timestamp', 'duration','label'])

    filtered_timer = [i for i in timer_df['sid'] if i==int(subject_id)]

    timer_filt = timer_df[timer_df['sid'].isin(filtered_timer)]
    timer_filt = timer_filt.reset_index(drop=True)
    
    timer_label = []
    
    for i in range(len(timer_filt)):
        if(timer_filt.loc[i]['raw_label']=='upstairs' or 
          timer_filt.loc[i]['raw_label']=='downstairs'):
            timer_label.append('walk')
        else:
            timer_label.append(timer_filt.loc[i]['raw_label'])

    timer_filt['label'] = pd.Series(timer_label)
    
    datetime_format = '%Y-%m-%d %H:%M:%S.%f'
    timer_filt['time_start'] = timer_filt['timestamp'].apply(lambda x: datetime.strptime(x, datetime_format))
    
    time_format = '%H:%M:%S'
    zero_date = datetime(1900, 1, 1)
    
    timer_filt['duration'] = timer_filt['duration'].apply(lambda x: datetime.strptime(x, time_format)-zero_date)
    
    for i in range(timer_filt.shape[0]):
        timer_filt.loc[i, 'time_end'] = timer_filt.loc[i, 'time_start'] + timer_filt.loc[i, 'duration']

#     print(timer_filt)
    
    return timer_filt

# Load Data of the Subject

In [7]:
def load_acc(subject_id, start_time, end_time):
    # Load accelerations
    acc_path = mypath + '/' + subject_id + '/' + subject_id + '-log_acc.csv'

    df = pd.read_csv(acc_path, header=None, names=['x','y','z','timestamp'])
    
    datetime_format = '%Y-%m-%d %H:%M:%S.%f'
    df['timestamp'] = df['timestamp'].apply(lambda x: datetime.strptime(x, datetime_format))

    filtered = [r for r in df['timestamp'] if r>=start_time and r<=end_time]

    df_filt = df[df['timestamp'].isin(filtered)]
    df_filt = df_filt.reset_index(drop=True)

    df_filt['ID'] = pd.Series([subject_id for i in range(len(df_filt))])
    
    cols = ['ID','timestamp','x','y','z']
    df_filt = df_filt[cols]

    return df_filt

In [8]:
def load_hr(subject_id, start_time, end_time):
    # Load heart rate
    hr_filepath = mypath + '/' + subject_id + '/' + subject_id + '-log_hr.csv'

    df2 = pd.read_csv(hr_filepath, header=None, names=['hr','timestamp'])
    
    datetime_format = '%Y-%m-%d %H:%M:%S.%f'
    df2['timestamp'] = df2['timestamp'].apply(lambda x: datetime.strptime(x, datetime_format))

    filtered = [r for r in df2['timestamp'] if r>=start_time and r<=end_time]

    df_hr = df2[df2['timestamp'].isin(filtered)]
    df_hr = df_hr.reset_index(drop=True)

    cols = ['timestamp','hr']
    df_hr = df_hr[cols]

    return df_hr

In [9]:
def merge_acc_and_hr(df_filt, df_hr):
    # Fill in missing HRs
    hr_cnt = 0

    for i in range(len(df_filt)):
        hr_time = df_hr.loc[hr_cnt,'timestamp']
        filt_time = df_filt.loc[i,'timestamp']

        if(hr_time<=filt_time):
            if(hr_cnt<len(df_hr)-1):
                hr_cnt += 1
        df_filt.loc[i,'HR'] = df_hr.loc[hr_cnt,'hr']

    # Normalize by dividing by g (standard gravity)
    g = 9.8
    df_filt.loc[:,'x'] = df_filt['x'].apply(lambda x: x/g)
    df_filt.loc[:,'y'] = df_filt['y'].apply(lambda x: x/g)
    df_filt.loc[:,'z'] = df_filt['z'].apply(lambda x: x/g)
    
    cols = ['x','y','z']
    xyz_ = df_filt[cols].to_dict('split')['data']
    xyz_new = MinMaxScaler().fit_transform(xyz_)

    for i in range(len(cols)):
        df_filt[cols[i]] = pd.Series(xyz_new.transpose()[i])
        
#     print(df_filt['x'])

    return df_filt

# Calculate Activity Index

In [10]:
std_i_bar = [0.00349329,0.00465817,0.00543154]
std_i_bar = np.array(std_i_bar)

In [11]:
def equation_bai(X_i):
    all_std = []
    
    std_i = np.std(X_i,axis=0)
    diff_std = std_i**2 - std_i_bar**2
    diff_std = (diff_std + 1) / (std_i_bar**2 + 1)
    
    diff_std_ = std_i**2

    all_std.append(diff_std)
    
    all_std = np.array(all_std)
    
    ai = np.sum(all_std**2,axis=1)/3
    ai[ai<0] = 0
    ai = np.sqrt(ai)
    
    return ai

In [12]:
def calc_ai(df1):
    H = 10
    ai1 = []

    for i in range(len(df1)):
        xyz_val = []
        if(i-H>=0):
            for j in range(H,0,-1):
                xyz_val.append([df1.loc[i-j,'x'],df1.loc[i-j,'y'],df1.loc[i-j,'z']])
            ai_val = float(equation_bai(xyz_val))
            ai1.append(ai_val)
        else:
            ai1.append(1)

    return ai1

# Preprocess (PCA, impure)

In [13]:
def preprocess_data(df_test, pca):
    ts_list = []
    g = 9.8

    X_list = []
    
    for i in range(len(df_test)):
        X_i = [df_test.loc[i]['x']/g, df_test.loc[i]['y']/g, df_test.loc[i]['z']/g]
        X_list.append(X_i)
        
    X_stack = np.vstack(X_list)
    X_norm = MinMaxScaler().fit_transform(X_stack)
    X_pca = pca.transform(X_norm)

    y_imp = [-1 for i in range(X_pca.shape[0])]
    X_imp, y_imp = prepare_impure_label(X_pca, y_imp)
    
    return X_imp, y_imp

# Predict

In [14]:
%run classifier_algo.ipynb

In [15]:
def predict_combine(X_imp, model, window_length=60):
    
    y_pred = model.predict(X_imp)
    print("Finished prediction")
    
#     y_pred = combine_2(X_imp, y_pred)
#     y_pred_fill = np.hstack(([y_pred[0] for i in range(window_length-1)], y_pred))
    
#     print(X_imp.shape, y_pred_fill.shape)
    
    return y_pred

# Group dataframe by label

In [16]:
def group_dataframe_by_label(df1, df_timer, subject_id, label_list):
    df_list = {}
    period = {}
    
    for label in label_list:
        df_list[label] = pd.DataFrame()
        period[label] = []
    
    for label in label_list:
#         print(label)
        for i in range(df_timer.shape[0]):
            start = 0
            end = 0
            
            if(df_timer.loc[i, 'label']==label):
                t_a = df_timer.loc[i, 'time_start']
                t_b = df_timer.loc[i, 'time_end']

                for j in range(df1.shape[0]):    
                    if(df1.loc[j, 'ID']==subject_id):
                        if(j>0 and df1.loc[j, 'timestamp']<=t_b and df1.loc[j-1, 'timestamp']<t_b):
                            end = j

                for j in reversed(range(df1.shape[0])):
                    if(df1.loc[j, 'ID']==subject_id):
                        if(j<df1.shape[0]-1 and df1.loc[j, 'timestamp']>=t_a and df1.loc[j+1, 'timestamp']>t_a):
                            start = j

                period[label].append([start, end])
                
                if(df_list[label].empty):
                    df_list[label] = df1.loc[start:end+1]
                else:
                    df_list[label].append(df1.loc[start:end+1], ignore_index=True)
                    
    for label in label_list:
        df_list[label] = df_list[label].reset_index(drop=True)

    return df_list, period

# Prepare Predicted Labels

In [17]:
def get_periods_from_list(y_pred, label_list):
    
    pred_periods = [[] for i in range(len(label_list))]

    keep = 0

    for i in range(len(y_pred)):
        keep_lb = y_pred[keep]

        if(keep_lb!=y_pred[i]):
            
            if(y_pred[i]!=None):
                pred_periods[y_pred[i-1]].append([keep, i-1])               

            keep = i

        elif(i==len(y_pred)-1):

            if(y_pred[i]!=None):
                pred_periods[y_pred[i-1]].append([keep, i]) 

    pred_periods = np.array(pred_periods)
    
    return pred_periods

## Unused

In [18]:
def postprocess_predicted(pred_periods, y_length):
    onesec = 1  # 1 sec.
    T = 0.16    # T = 1/f

    pp_periods = []
    
    for pp in pred_periods:
        pp_i = pred_periods[pp]
        
        temp = []
        for p in pp_i:
            if(p[1]-p[0]>int(onesec*2*(1/T))):
                temp.append([p[0],p[1]])
                
        pp_periods.append(temp)

    pp_periods = np.array(pp_periods)
    
    other_label = -1
    all_run = [other_label for i in range(y_length)]

    for i in range(len(pp_periods)):
        for p in pp_periods[i]:
            for j in range(p[0],p[1]+1):
                all_run[j] = i

    for i in range(len(all_run)-1,0,-1):
        if(all_run[i-1]==other_label):
            all_run[i-1] = all_run[i]

    return all_run

# Get sequence from periods

In [19]:
def sequence_from_periods(periods, label_list):
    
    max_length = 0
    
    for label in label_list:
        if(len(periods[label])>0):
            periods_i = np.hstack(periods[label])
        
            if(max_length<max(periods_i)):
                max_length = max(periods_i)
    
    seq = ['' for i in range(max_length+1)]
    
    for label in label_list:
        for element in periods[label]:
            for i in range(element[0], element[1]+1):
                seq[i] = label
    
    return seq, max_length

# Evaluation

In [20]:
def evaluate_period(p1, p2, max_length, label_list):
    iou_all = []
    
    for lb in label_list:
        p1_onehot = []
        for i in range(max_length):
            if(p1[i]==lb):
                p1_onehot.append(1)
            else:
                p1_onehot.append(0)
                
        p2_onehot = []
        for i in range(max_length):
            if(p2[i]==lb):
                p2_onehot.append(1)
            else:
                p2_onehot.append(0)
                
        intersection = 0
        union = 0
        
        for i in range(max_length):
            if(p1_onehot[i]==1 and p2_onehot[i]==1):
                intersection += 1
            if(p1_onehot[i]==1 or p2_onehot[i]==1):
                union += 1
                
        iou_lb = intersection/union
        
        iou_all.append(iou_lb)
        
    return iou_all

# Load all data

In [21]:
def load_all_data(subject_id, label_list):
    
    print("Loading {0}'s data".format(subject_id))

    df_timer = load_timer(subject_id)
    
    start_time = df_timer.loc[0, 'time_start']
    end_time = df_timer.loc[df_timer.shape[0]-1, 'time_end']

    df_acc = load_acc(subject_id, start_time, end_time)
    df_hr = load_hr(subject_id, start_time, end_time)

    df1 = merge_acc_and_hr(df_acc, df_hr)
    ai1 = calc_ai(df1)

    df1['AI'] = pd.Series(ai1)
    
    df_acc_label, true_periods = group_dataframe_by_label(df1, df_timer, subject_id, label_list)
    
    return df_acc_label, true_periods

#     X_impure, y_impure = preprocess_data(df_acc, pca)
#     y_pred = predict_combine(X_impure, model)
#     p_periods = get_periods_from_list(y_pred, label_list)
#     pred_periods = {}

#     for i in range(len(label_list)):
#         pred_periods[label_list[i]] = p_periods[i]
    
#     pp_all = postprocess_predicted(pred_periods, len(y_pred))
    
#     p_true, len_true = sequence_from_periods(true_periods, label_list)
#     p_pred, len_pred = sequence_from_periods(pred_periods, label_list)
    
#     iou = evaluate_period(p_true, p_pred, len_pred, label_list)
    
#     print('label:', label_list)
#     print('iou:', iou)

In [22]:
all_subjects = [range(1001, 1019), range(3001, 3007)]
all_subjects = np.hstack(all_subjects)
all_subjects = [str(i) for i in all_subjects]
print(all_subjects)

label_list = ['sit', 'sleep', 'stand', 'walk']

df_all_label = {}
for label in label_list:
    df_all_label[label] = pd.DataFrame()
# print(df_all_label)

for subject_id in all_subjects:
    df_label, true_periods = load_all_data(subject_id, label_list)
    
    for label in label_list:
        
        if(df_all_label[label].empty):
            df_all_label[label] = df_label[label]
        else:
            df_all_label[label] = df_all_label[label].append(df_label[label], ignore_index=True)
            
        print(df_all_label[label].shape)
            
print('finished loading')

['1001', '1002', '1003', '1004', '1005', '1006', '1007', '1008', '1009', '1010', '1011', '1012', '1013', '1014', '1015', '1016', '1017', '1018', '3001', '3002', '3003', '3004', '3005', '3006']
Loading 1001's data
(188, 7)
(188, 7)
(188, 7)
(375, 7)
Loading 1002's data
(375, 7)
(376, 7)
(376, 7)
(563, 7)
Loading 1003's data
(569, 7)
(563, 7)
(564, 7)
(937, 7)
Loading 1004's data
(757, 7)
(750, 7)
(752, 7)
(1311, 7)
Loading 1005's data
(945, 7)
(938, 7)
(940, 7)
(1686, 7)
Loading 1006's data
(1133, 7)
(1125, 7)
(1128, 7)
(2060, 7)
Loading 1007's data
(1320, 7)
(1313, 7)
(1316, 7)
(2441, 7)
Loading 1008's data
(1507, 7)
(1501, 7)
(1504, 7)
(2816, 7)
Loading 1009's data
(1702, 7)
(1689, 7)
(1692, 7)
(3029, 7)
Loading 1010's data
(1890, 7)
(1884, 7)
(1880, 7)
(3217, 7)
Loading 1011's data
(2078, 7)
(2072, 7)
(2068, 7)
(3405, 7)
Loading 1012's data
(2266, 7)
(2260, 7)
(2255, 7)
(3593, 7)
Loading 1013's data
(2454, 7)
(2448, 7)
(2442, 7)
(3781, 7)
Loading 1014's data
(2641, 7)
(2641, 7)
(2629

# Dataframe slice functions

In [23]:
def slice_dataframe(df, col, segment_length=24):
    df_list = []
    
    for i in range(df.shape[0]-segment_length):
        df_segment = df.loc[i:i+segment_length-1][col]
        df_list.append(list(df_segment))
        
    return np.array(df_list)

In [24]:
def get_df_sliced(df_all_label, label_list):
    df_sliced = {}
    slice_labels = []
    
    cols = ['x', 'y', 'z']
    
    for i, c in enumerate(cols):
        df_sliced[c] = []
        
        for lb in label_list:
            df_slice_lb_c = slice_dataframe(df_all_label[lb], c)
            print(df_slice_lb_c.shape)
            
            df_sliced[c].append(df_slice_lb_c)    # append every label to an array of axis c
    
        df_sliced[c] = np.array([item for sublist in df_sliced[c] for item in sublist])
    
    return df_sliced
                        
#     for label in label_list:
#         df_slice_i = slice_dataframe(df_all_label[label], slice_length)
#         slice_labels.append([label for i in range(df_slice_i.shape[0])])
        
#         df_slice_list.append(df_slice_i)
        
#     slice_list = [item for sublist in df_slice_list for item in sublist]
#     slice_list = [np.hstack(x) for x in slice_list]
    
#     slice_labels = np.hstack(slice_labels)
        
#     return np.array(slice_list), np.array(slice_labels)

# Create histograms for each slice

In [25]:
def normalize(v):
    return v / np.linalg.norm(v) if np.linalg.norm(v)!=0 else v

In [26]:
def most_similar_cluster_center_idx(slice_i, cluster_centers):
    min_dist = math.inf
    cluster_idx = 0
    for i, cluster in enumerate(cluster_centers):
        dist_i = np.linalg.norm(cluster-slice_i)
        if(dist_i<min_dist):
            min_dist = dist_i
            cluster_idx = i
            
    return cluster_idx

In [27]:
def histogram_for_all_slices(df_sliced_c):
    kmeans = KMeans(n_clusters=128, random_state=42).fit(df_sliced_c)
    cluster_centers = kmeans.cluster_centers_
    
    histogram = [0 for i in range(len(cluster_centers))]
    
    for slice_i in df_sliced_c:
        histogram_idx = most_similar_cluster_center_idx(slice_i, cluster_centers)
        
        histogram[histogram_idx] += 1
        
    normalized_hist = normalize(histogram)
    
    return np.array(normalized_hist)

In [28]:
df_sliced = get_df_sliced(df_all_label, label_list)

(4257, 24)
(4436, 24)
(4250, 24)
(6588, 24)
(4257, 24)
(4436, 24)
(4250, 24)
(6588, 24)
(4257, 24)
(4436, 24)
(4250, 24)
(6588, 24)


In [29]:
normalized_hist = {}
cols = ['x', 'y', 'z']

for c in cols:
    normalized_hist[c] = histogram_for_all_slices(df_sliced[c])

In [30]:
for c in cols:
    print(df_sliced[c].shape)
    print('-------')

(19531, 24)
-------
(19531, 24)
-------
(19531, 24)
-------


# Recycle bin

In [None]:
for label in label_list:
        segments_i = [df_all_label[label][a:a+seg_length] for a in range(0, df_all_label[label].shape[0], seg_length)]
    
        print(len(segments_i[-1]))
        
        
##############

slice_y.append([label for i in range(df_all_label[label].shape[0])])