# Import Core Libraries

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
import math
import pickle

from os import listdir, walk
from os.path import isfile, join

In [2]:
from detect_peaks import detect_peaks

# Load Dataset

In [3]:
%run load_dataset.ipynb

In [4]:
subj_range = np.hstack((np.arange(1001,1013),np.arange(2002,2003)))

all_subjects = [str(i) for i in subj_range]

In [5]:
subj_range = np.hstack((np.arange(2001,2002),np.arange(3001,3006)))

all_patients = [str(i) for i in subj_range]

In [6]:
print(all_subjects)
print(all_patients)

['1001', '1002', '1003', '1004', '1005', '1006', '1007', '1008', '1009', '1010', '1011', '1012', '2002']
['2001', '3001', '3002', '3003', '3004', '3005']


In [7]:
X_all, y_all, subj_all = load_all_data(all_subjects)

Loading 1001's data


of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort)


Loading 1002's data
Loading 1003's data
Loading 1004's data
Loading 1005's data
Loading 1006's data
Loading 1007's data
Loading 1008's data
Loading 1009's data
Loading 1010's data
Loading 1011's data
Loading 1012's data
Loading 2002's data
Finished loading


In [8]:
X_all_p, y_all_p, subj_all_p = load_all_data(all_patients)

Loading 2001's data
Loading 3001's data
Loading 3002's data
Loading 3003's data
Loading 3004's data
Loading 3005's data
Finished loading


In [9]:
X_all_p.shape

(10998, 3)

# Data Preprocessing

In [10]:
%run preprocessing.ipynb

In [11]:
print(X_all.shape, y_all.shape)

(26716, 3) (26716,)


In [12]:
X_all

array([[-2.99275204e-01, -9.03566837e-01, -6.10765714e-04],
       [-3.04649949e-01, -9.05032653e-01,  4.64181969e-03],
       [-3.01229663e-01, -9.01612347e-01,  2.68736939e-03],
       ...,
       [-8.85488163e-01,  1.68082724e-01,  2.51024714e-01],
       [-9.08941531e-01,  2.15478163e-01,  3.25293837e-01],
       [-9.29463367e-01, -8.44078224e-02,  2.32946041e-01]])

# Group Data by Label and Normalize Data

In [14]:
new_label_list = [0,1,2,3]
new_label_dict = {
    0: 'sit',
    1: 'sleep',
    2: 'stand',
    3: 'walk'
}

colors = ['r','g','b','navy','turquoise','darkorange']

## Show Plot for each Activity and Subject

In [17]:
# plot_all_label(X_label, y_all, new_label_list, new_label_dict)

# Calculate Roll, Pitch, Yaw

In [18]:
roll, pitch, yaw = calc_rpy(X_all, colors)
rpy = np.array([roll, pitch, yaw]).transpose()

print(rpy.shape, y_all.shape)

(26716, 3) (26716,)


In [19]:
roll, pitch, yaw = calc_rpy(X_all_p, colors)
rpy_p = np.array([roll, pitch, yaw]).transpose()

print(rpy_p.shape, y_all_p.shape)

(10998, 3) (10998,)


# Apply PCA

In [22]:
# apply PCA to X_all and rpy
X_pca, pca = apply_pca(X_all, y_all,label_list)
rpy_pca, pca = apply_pca(rpy, y_all, label_list)

NameError: name 'LABELS' is not defined

In [21]:
# apply PCA and LDA to X_all and rpy
X_pca_p, pca_p = apply_pca(X_all_p, y_all_p, label_list)
rpy_pca_p, pca_p = apply_pca(rpy_p, y_all_p, label_list)

NameError: name 'label_list' is not defined

# Reshape Data (Pure Label)

In [None]:
print(rpy_pca_p.shape, y_all_p.shape, subj_all_p.shape)

In [None]:
# get label-separated X and y
X_pure, y_pure = prepare_pure_label(rpy_pca_p, y_all_p, subj_all_p, all_patients, new_label_list)
y_pure = y_pure.reshape((y_pure.shape[0],))

In [None]:
print(X_pure.shape, y_pure.shape)

# Reshape Data (Impure Label)

In [None]:
X_impure, y_impure = prepare_impure_label(rpy_pca_p, y_all_p)

In [None]:
print(X_impure.shape, y_impure.shape)

# Split Train and Test Set

In [None]:
# Split training data and testing data
X_train, X_test, y_train, y_test = train_test_split(X_pure, y_pure, test_size=0.2, random_state=42)

X_tr, X_te, y_tr, y_te = train_test_split(X_impure, y_impure, test_size=0.2, random_state=42)

print(X_tr.shape)
print(X_te.shape)

In [None]:
print(X_train.shape)
print(X_test.shape)

# Find proper thresholds of walk activity

## Patients

In [None]:
X_dict = {
    'id': subj_all_p,
    'x': [X_i[0] for X_i in X_all_p],
    'y': [X_i[1] for X_i in X_all_p],
    'z': [X_i[2] for X_i in X_all_p],
    'x_pca': [X_i_pca[0] for X_i_pca in X_pca_p],
    'y_pca': [X_i_pca[1] for X_i_pca in X_pca_p],
    'z_pca': [X_i_pca[2] for X_i_pca in X_pca_p],
    'roll': [rpy_pca_i[0] for rpy_pca_i in rpy_pca_p],
    'pitch': [rpy_pca_i[1] for rpy_pca_i in rpy_pca_p],
    'yaw': [rpy_pca_i[2] for rpy_pca_i in rpy_pca_p],
    'label': y_all_p
}

df_rpy = pd.DataFrame(X_dict)

In [None]:
df_rpy

In [None]:
df_walk = df_rpy[df_rpy['label']==label_dict['walk']]
df_walk = df_walk.reset_index(drop=True)

In [None]:
df_nonwalk = df_rpy[df_rpy['label']!=label_dict['walk']]
df_nonwalk = df_nonwalk.reset_index(drop=True)

In [None]:
cols = ['x_pca', 'y_pca', 'z_pca']

df_rpy[cols].plot(figsize=(20,6))
plt.show()

In [None]:
cols = ['x_pca', 'y_pca', 'z_pca']

for c in cols:
    f, ax = plt.subplots()
    df_walk[c].plot(figsize=(20,6), ax=ax)
    ax.legend(c)
    
    plt.show()

In [None]:
cols = ['x_pca', 'y_pca', 'z_pca']

for c in cols:
    f, ax = plt.subplots()
    df_nonwalk[c].plot(figsize=(20,6), ax=ax)
    ax.legend(c)
    
    plt.show()

In [None]:
cols = ['x_pca', 'y_pca', 'z_pca']
all_avg_diff = [[],[],[]]

for subj_i in all_patients:
    df_walk_i = df_walk[df_walk['id']==subj_i]
    df_walk_i = df_walk_i.reset_index(drop=True)
    
    print(subj_i)
    
    for i in range(len(cols)):
        c = cols[i]
        
        peak_idx = detect_peaks(df_walk_i[c], show=True)    
        valley_idx = detect_peaks(df_walk_i[c], valley=True, show=True)

        peak_point = [df_walk_i.loc[i, c] for i in peak_idx]    
        valley_point = [df_walk_i.loc[i, c] for i in valley_idx]

        min_length = min(len(peak_idx), len(valley_idx))

        diff_peak_valley = [np.abs(peak_point[i] - valley_point[i]) for i in range(min_length)]
        avg_diff_pv = np.mean(diff_peak_valley)

#         print(diff_peak_valley)
        print(c, avg_diff_pv)
        
        all_avg_diff[i].append(avg_diff_pv)
        
all_avg_diff = np.array(all_avg_diff)

In [None]:
cols = ['x_pca', 'y_pca', 'z_pca']
all_avg_diff_nw = [[],[],[]]

for subj_i in all_patients:
    df_nonwalk_i = df_nonwalk[df_nonwalk['id']==subj_i]
    df_nonwalk_i = df_nonwalk_i.reset_index(drop=True)
    
    print(subj_i)
    
    for i in range(len(cols)):
        c = cols[i]
        
        peak_idx = detect_peaks(df_nonwalk_i[c], show=True)    
        valley_idx = detect_peaks(df_nonwalk_i[c], valley=True, show=True)

        peak_point = [df_nonwalk_i.loc[i, c] for i in peak_idx]    
        valley_point = [df_nonwalk_i.loc[i, c] for i in valley_idx]

        min_length = min(len(peak_idx), len(valley_idx)) - 1

        diff_peak_valley = [np.abs(peak_point[i] - valley_point[i]) for i in range(min_length)]
        avg_diff_pv = np.mean(diff_peak_valley)

    #     print(diff_peak_valley)
        print(c, avg_diff_pv)
        
        all_avg_diff_nw[i].append(avg_diff_pv)
        
all_avg_diff_nw = np.array(all_avg_diff_nw)

In [None]:
aad_t = all_avg_diff.transpose()

print('walking for patients')
for i in range(len(aad_t)):
    print(all_patients[i], aad_t[i])
    
aad_nw_t = all_avg_diff_nw.transpose()

print()
print('non-walking for patients')
for i in range(len(aad_nw_t)):
    print(all_patients[i], aad_nw_t[i])

In [None]:
three_sec = 20   # 3 sec/0.16 sec = 18.75 time point
one_sec = 6      # 1 sec/0.16 sec = 6.25 time point

cols = ['x_pca', 'y_pca', 'z_pca']
threshold = [0.15, 0.13, 0.11]

exceed_thres = [[],[],[]]

for cl in range(len(cols)):
    c = cols[cl]
    
    for i in range(0, len(df_walk)-three_sec, one_sec):
        df_walk_i = [df_walk.loc[j, c] for j in range(i,i+three_sec)]
        
        peak_idx = detect_peaks(df_walk_i)    
        valley_idx = detect_peaks(df_walk_i, valley=True)

        peak_point = [df_walk_i[j] for j in peak_idx]    
        valley_point = [df_walk_i[j] for j in valley_idx]

        min_length = min(len(peak_idx), len(valley_idx))

        diff_peak_valley = [np.abs(peak_point[i] - valley_point[i]) for i in range(min_length)]
        diff_peak_valley = np.array(diff_peak_valley)
        
        exceed = len(diff_peak_valley[diff_peak_valley>=threshold[cl]])
        exceed_thres[cl].append(exceed)

In [None]:
three_sec = 20   # 3 sec/0.16 sec = 18.75 time point
one_sec = 6      # 1 sec/0.16 sec = 6.25 time point

cols = ['x_pca', 'y_pca', 'z_pca']
threshold = [0.15, 0.13, 0.11]

exceed_thres_nw = [[],[],[]]

for cl in range(len(cols)):
    c = cols[cl]
    
    for i in range(0, len(df_nonwalk)-three_sec, one_sec):
        df_nonwalk_i = [df_nonwalk.loc[j, c] for j in range(i,i+three_sec)]
        
        peak_idx = detect_peaks(df_nonwalk_i)    
        valley_idx = detect_peaks(df_nonwalk_i, valley=True)

        peak_point = [df_nonwalk_i[j] for j in peak_idx]    
        valley_point = [df_nonwalk_i[j] for j in valley_idx]

        min_length = min(len(peak_idx), len(valley_idx))

        diff_peak_valley = [np.abs(peak_point[i] - valley_point[i]) for i in range(min_length)]
        diff_peak_valley = np.array(diff_peak_valley)
        
        exceed = len(diff_peak_valley[diff_peak_valley>=threshold[cl]])
        exceed_thres_nw[cl].append(exceed)

In [None]:
import seaborn as sns

In [None]:
for i in range(3):
    f, axis = plt.subplots(figsize=(10,6))
    ax = sns.distplot(exceed_thres[i], kde=False, ax=axis, label=cols[i])
    ax.legend()

In [None]:
for i in range(3):
    f, axis = plt.subplots(figsize=(10,6))
    ax = sns.distplot(exceed_thres_nw[i], kde=False, ax=axis, label=cols[i])
    ax.legend()
    ax.set_ylim(0,200)

## Normal People

In [None]:
X_dict_1 = {
    'id': subj_all,
    'x': [X_i[0] for X_i in X_all],
    'y': [X_i[1] for X_i in X_all],
    'z': [X_i[2] for X_i in X_all],
    'x_pca': [X_i_pca[0] for X_i_pca in X_pca],
    'y_pca': [X_i_pca[1] for X_i_pca in X_pca],
    'z_pca': [X_i_pca[2] for X_i_pca in X_pca],
    'roll': [rpy_pca_i[0] for rpy_pca_i in rpy_pca],
    'pitch': [rpy_pca_i[1] for rpy_pca_i in rpy_pca],
    'yaw': [rpy_pca_i[2] for rpy_pca_i in rpy_pca],
    'label': y_all
}

df_rpy_1 = pd.DataFrame(X_dict_1)

In [None]:
df_rpy_1

In [None]:
df_walk_1 = df_rpy_1[df_rpy_1['label']==label_dict['walk']]
df_walk_1 = df_walk_1.reset_index(drop=True)

In [None]:
df_nonwalk_1 = df_rpy_1[df_rpy_1['label']!=label_dict['walk']]
df_nonwalk_1 = df_nonwalk_1.reset_index(drop=True)

In [None]:
cols = ['x_pca', 'y_pca', 'z_pca']
all_avg_diff_1 = [[],[],[]]

for subj_i in all_subjects:
    df_walk_i = df_walk_1[df_walk_1['id']==subj_i]
    df_walk_i = df_walk_i.reset_index(drop=True)
    
    print(subj_i)
    
    for i in range(len(cols)):
        c = cols[i]
        
        peak_idx = detect_peaks(df_walk_i[c], show=True)    
        valley_idx = detect_peaks(df_walk_i[c], valley=True, show=True)

        peak_point = [df_walk_i.loc[i, c] for i in peak_idx]    
        valley_point = [df_walk_i.loc[i, c] for i in valley_idx]

        min_length = min(len(peak_idx), len(valley_idx))

        diff_peak_valley = [np.abs(peak_point[i] - valley_point[i]) for i in range(min_length)]
        avg_diff_pv = np.mean(diff_peak_valley)

#         print(diff_peak_valley)
        print(c, avg_diff_pv)
        
        all_avg_diff_1[i].append(avg_diff_pv)
        
all_avg_diff_1 = np.array(all_avg_diff_1)

In [None]:
cols = ['x_pca', 'y_pca', 'z_pca']
all_avg_diff_nw_1 = [[],[],[]]

for subj_i in all_subjects:
    df_nonwalk_i = df_nonwalk_1[df_nonwalk_1['id']==subj_i]
    df_nonwalk_i = df_nonwalk_i.reset_index(drop=True)
    
    print(subj_i)
    
    for i in range(len(cols)):
        c = cols[i]
        
        peak_idx = detect_peaks(df_nonwalk_i[c], show=True)    
        valley_idx = detect_peaks(df_nonwalk_i[c], valley=True, show=True)

        peak_point = [df_nonwalk_i.loc[i, c] for i in peak_idx]    
        valley_point = [df_nonwalk_i.loc[i, c] for i in valley_idx]

        min_length = min(len(peak_idx), len(valley_idx)) - 1

        diff_peak_valley = [np.abs(peak_point[i] - valley_point[i]) for i in range(min_length)]
        avg_diff_pv = np.mean(diff_peak_valley)

    #     print(diff_peak_valley)
        print(c, avg_diff_pv)
        
        all_avg_diff_nw_1[i].append(avg_diff_pv)
        
all_avg_diff_nw_1 = np.array(all_avg_diff_nw_1)

In [None]:
aad_t_1 = all_avg_diff_1.transpose()

print('walking for normal people')
for i in range(len(aad_t_1)):
    print(all_subjects[i], aad_t_1[i])
    
aad_t_nw_1 = all_avg_diff_nw_1.transpose()

print()
print('non-walking for normal people')
for i in range(len(aad_t_nw_1)):
    print(all_subjects[i], aad_t_nw_1[i])

In [None]:
three_sec = 20   # 3 sec/0.16 sec = 18.75 time point
one_sec = 6      # 1 sec/0.16 sec = 6.25 time point

cols = ['x_pca', 'y_pca', 'z_pca']
threshold_1 = [0.28, 0.18, 0.18]

exceed_thres_1 = [[],[],[]]

for cl in range(len(cols)):
    c = cols[cl]
    
    for i in range(0, len(df_walk_1)-three_sec, one_sec):
        df_walk_i = [df_walk_1.loc[j, c] for j in range(i,i+three_sec)]
        
        peak_idx = detect_peaks(df_walk_i)    
        valley_idx = detect_peaks(df_walk_i, valley=True)

        peak_point = [df_walk_i[j] for j in peak_idx]    
        valley_point = [df_walk_i[j] for j in valley_idx]

        min_length = min(len(peak_idx), len(valley_idx))

        diff_peak_valley = [np.abs(peak_point[i] - valley_point[i]) for i in range(min_length)]
        diff_peak_valley = np.array(diff_peak_valley)
        
        exceed = len(diff_peak_valley[diff_peak_valley>=threshold_1[cl]])
        exceed_thres_1[cl].append(exceed)

In [None]:
three_sec = 20   # 3 sec/0.16 sec = 18.75 time point
one_sec = 6      # 1 sec/0.16 sec = 6.25 time point

cols = ['x_pca', 'y_pca', 'z_pca']
threshold_1 = [0.28, 0.18, 0.18]

exceed_thres_nw_1 = [[],[],[]]

for cl in range(len(cols)):
    c = cols[cl]
    
    for i in range(0, len(df_nonwalk_1)-three_sec, one_sec):
        df_nonwalk_i = [df_nonwalk_1.loc[j, c] for j in range(i,i+three_sec)]
        
        peak_idx = detect_peaks(df_nonwalk_i)    
        valley_idx = detect_peaks(df_nonwalk_i, valley=True)

        peak_point = [df_nonwalk_i[j] for j in peak_idx]    
        valley_point = [df_nonwalk_i[j] for j in valley_idx]

        min_length = min(len(peak_idx), len(valley_idx))

        diff_peak_valley = [np.abs(peak_point[i] - valley_point[i]) for i in range(min_length)]
        diff_peak_valley = np.array(diff_peak_valley)
        
        exceed = len(diff_peak_valley[diff_peak_valley>=threshold_1[cl]])
        exceed_thres_nw_1[cl].append(exceed)

In [None]:
for i in range(3):
    f, axis = plt.subplots(figsize=(10,6))
    ax = sns.distplot(exceed_thres_1[i], kde=False, ax=axis, label=cols[i])
    ax.legend()

In [None]:
for i in range(3):
    f, axis = plt.subplots(figsize=(10,6))
    ax = sns.distplot(exceed_thres_nw_1[i], kde=False, ax=axis, label=cols[i])
    ax.legend()
    ax.set_ylim(0,500)

# K-Nearest Neighbors

In [None]:
nn_model = nn_classifier(X_train, y_train)
print("Finished training")

In [None]:
filename = basepath + 'model/knn_model.pkl'
pickle.dump(nn_model, open(filename, 'wb'))

In [None]:
y_pred = nn_model.predict(X_test)

In [None]:
nn_model_2 = nn_classifier(X_tr, y_tr)
print("Finished training")

In [None]:
y_pred_2 = nn_model_2.predict(X_te)

## Evaluation

In [None]:
%run eval_score.ipynb

In [None]:
LABELS = ['sit','sleep','stand','walk']

In [None]:
acc = accuracy_score(y_test, y_pred)
print(acc)

show_conf_matrix(y_test, y_pred, LABELS)
show_clf_report(y_test, y_pred, LABELS)

In [None]:
acc = accuracy_score(y_te, y_pred_2)
print(acc)

show_conf_matrix(y_te, y_pred_2, LABELS)
show_clf_report(y_te, y_pred_2, LABELS)

# Walk Algorithm

In [None]:
%run classifier_alg.ipynb

In [None]:
cols = ['x_pca', 'y_pca', 'z_pca']
xyz_pca = df_rpy[cols].to_dict(orient='split')['data']

In [None]:
xyz_pca

In [None]:
# classify walking
walk = calc_walk(X_all)
walk_its = intersection_walk(walk)
walk_pred_p = calc_walk_periods(walk_its)

# walk_stairs_exact_p = get_exact_walk_stairs(y_all)
walk_exact_p = get_exact_walk(y_all)
    
walk_pred = binarize_walk_prd(walk_pred_p, y_all)
# walk_stairs_exact = binarize_walk_prd(walk_stairs_exact_p, y_all)
walk_exact = binarize_walk_prd(walk_exact_p, y_all)

## Walk Algorithm Evaluation

In [None]:
walk_lbl = ['NaN','walk']

In [None]:
acc = accuracy_score(walk_exact, walk_pred)
print(acc)

show_conf_matrix(walk_exact, walk_pred, walk_lbl)
show_clf_report(walk_exact, walk_pred, walk_lbl)

# Combine SVM and Walk Algorithm

In [None]:
y_pred_new = combine(X_test, y_pred)

In [None]:
acc = accuracy_score(y_test, y_pred_new)
print(acc)

show_conf_matrix(y_test, y_pred_new, LABELS)
show_clf_report(y_test, y_pred_new, LABELS)

# Test Model with Some Subjects

In [None]:
%run test_model.ipynb

In [None]:
filename = basepath + 'model/knn_model.pkl'

model = pickle.load(open(filename,'rb'))

In [None]:
all_subjects = [str(i) for i in range(1001,1009)]
all_subjects.append('2001')
all_subjects.append('2002')

In [None]:
call_functions(all_subjects)

In [None]:
all_subjects = [str(i) for i in range(3001,3006)]

call_functions(all_subjects)

In [None]:
s = '3004'
df_sid = load_actual_timer(s)
df_test = load_data(s, df_sid)

X_vis_imp, ts_list_imp = preprocess_data(df_test, pca)
df_y = predict(X_vis_imp, ts_list_imp)

df_test, df_y = prepare_actual_lb(df_test, df_y, df_sid)

actual_periods = get_actual_periods(df_test)
pred_periods = get_predicted_periods(df_y)
pp_all_run = postprocess_predicted(pred_periods, df_y)

df_y['y_pred'] = pd.Series(pp_all_run)
pp_periods = get_predicted_periods(df_y)
plot_highlighted(s, df_test, pred_periods, pp_periods, actual_periods)

evaluate(df_y)

In [None]:
LABELS = ['sit', 'sleep', 'stand', 'walk']

df_y_notnull = df_y.dropna()
df_y_notnull = df_y_notnull.reset_index(drop=True)

actual_y = list(df_y_notnull['y_actual'])
pred_y = list(df_y_notnull['y_pred'])

last = len(pred_y)

for i in range(len(pred_y)):
    if(pred_y[i]==-1):
        last = i
        break

pred_y = pred_y[:last]
actual_y = actual_y[:last]

acc = accuracy_score(actual_y, pred_y)
print(acc)

show_conf_matrix(actual_y, pred_y, LABELS)
# show_clf_report(actual_y, pred_y, LABELS)

labels_list = [0,1,2,3]
report = classification_report(actual_y, pred_y, labels_list, output_dict=True)
print(report['macro avg'])

In [None]:
df_test['x'][2200:].plot()

plt.show()

# Display Table

In [None]:
from prettytable import PrettyTable

In [None]:
print(s)
print(df_y)

In [None]:
label_period = []
period_list = [[] for i in range(len(LABELS))]

first = 0
keep = 0

for i in range(len(df_y)):
    if(calc_sec(df_y.loc[i]['timestamp'].split(' ')[1])>=calc_sec(df_sid.loc[0]['timestamp'].split(' ')[1]) and
       calc_sec(df_y.loc[i]['timestamp'].split(' ')[1])<=calc_sec(df_sid.loc[len(df_sid)-1]['timestamp'].split(' ')[1])):
        
        keep_lb = df_y.loc[keep]['y_pred']

        if(keep_lb!=df_y.loc[i]['y_pred']):
            label_period.append([df_y.loc[keep]['timestamp'], df_y.loc[i-1]['timestamp'], 
                                 df_y.loc[i-1]['y_pred']])

            period_list[df_y.loc[i-1]['y_pred']].append([df_y.loc[keep]['timestamp'], df_y.loc[i-1]['timestamp']])

            keep = i

In [None]:
labels_list = ['sit', 'sleep', 'stand', 'walk']
headers = ['start', 'end', 'pred']

t = PrettyTable(headers)

for row in label_period:
#     if(calc_sec(row[1].split(' ')[1])-calc_sec(row[0].split(' ')[1])>1):
    t.add_row([row[0], row[1], labels_list[row[2]]])

In [None]:
print(t)

In [None]:
print(len(label_period))

label_cnt_list = [0 for i in range(len(labels_list))]
for lb_p in label_period:
    label_i = lb_p[2]
    
    label_cnt_list[label_i] += 1

activity_changes = []
for i in range(len(labels_list)):
    activity_changes.append([labels_list[i], label_cnt_list[i]])
    
print(activity_changes)

In [None]:
headers = ['Label', 'Activities Count']
tabl_act_chng = PrettyTable(headers)

for ac in activity_changes:
    tabl_act_chng.add_row([ac[0], ac[1]])
    
tabl_act_chng.add_row(['', ''])
tabl_act_chng.add_row(['total changes', len(label_period)])

In [None]:
print(tabl_act_chng)

## Active Inactive AC (ALL)

In [None]:
headers = ['Label', 'Activities Count']
tabl_act = PrettyTable(headers)
inactive_table = []
active_table = []

sum = 0
sum_2 = 0
for ac in activity_changes:
    if(ac[0] == 'sit' or ac[0] == 'sleep'):
        sum += ac[1]
    else :
        sum_2 += ac[1]

tabl_act.add_row(['Inactive', sum])
tabl_act.add_row(['Active', sum_2])

In [None]:
print(tabl_act)

# Convert Time to String Method

In [None]:
def convert_time_to_string(sec):
    minute = math.floor(sec/60)
    sec = int(sec%60)

    time_string = str(minute) + ':' + str(sec)
    if(sec<10):
        time_string = str(minute) + ':0' + str(sec)
    
    return time_string

# Predicted Duration

In [None]:
total_secs = []
for i in range(len(period_list)):    
    secs = 0
    for p_i in period_list[i]:
        sec = calc_sec(p_i[1].split(' ')[1]) - calc_sec(p_i[0].split(' ')[1])
        secs += sec
    
    secs = round(secs, 3)
    total_secs.append(secs)
    
percent_secs = [round(t/np.sum(total_secs)*100, 3) for t in total_secs]

tb = PrettyTable(['Label', 'Minutes', 'Percentage', 'Activity Count'])

for i in range(len(LABELS)):
    tb.add_row([labels_list[i], convert_time_to_string(total_secs[i]), percent_secs[i], label_cnt_list[i]])

tb.add_row(['', '', '',''])
tb.add_row(['total', convert_time_to_string(round(np.sum(total_secs), 3)), 
            round(np.sum(percent_secs), 3), len(label_period)])

# Actual Duration

In [None]:
df_lb = df_sid.groupby('label')

dura_dict = {}
for lb in labels_list:
    dura_dict[lb] = 0

idx = list(df_sid.index)
for i in range(len(labels_list)):
    lb = labels_list[i]
    df_temp = df_lb.get_group(lb)
    df_temp = df_temp.reset_index(drop=True)
        
    if(lb=='downstairs' or lb=='upstairs'):
        lb = 'walk'
    
    for j in range(len(df_temp)):
        dura_dict[lb] += calc_sec(df_temp.loc[j]['duration'])
        
total_dura = np.sum([dura_dict[lb] for lb in labels_list])

percent_list = []
        
tabl = PrettyTable(['Label', 'Minutes', 'Percentage'])
for lb in labels_list:
    percent = round(dura_dict[lb]/total_dura*100, 3)
    tabl.add_row([lb, convert_time_to_string(dura_dict[lb]), round(dura_dict[lb]/total_dura*100, 3)])
    
    percent_list.append(percent)
    
tabl.add_row(['', '', ''])    
tabl.add_row(['total', convert_time_to_string(total_dura), round(np.sum(percent_list), 3)])

# Activity Durations Table

In [None]:
print('Prediction')
print(tb)

print('Actual')
print(tabl)

# Bar Chart for Every 5 Minutes

In [None]:
s_idx = 0
f_idx = 1
lb_idx = 2

## Separate Each 5 Minutes

In [None]:
fivemin = 60*5
new_label_period = []

start_time = calc_sec(label_period[0][s_idx].split(' ')[1])
finish_time = calc_sec(label_period[-1][f_idx].split(' ')[1])

floor_start = start_time - (start_time%fivemin)
ceil_finish = finish_time - (finish_time%fivemin) + fivemin

print(calc_ts(floor_start), calc_ts(ceil_finish))

tm_s = floor_start
tm_f = floor_start + fivemin
date = label_period[0][s_idx].split(' ')[0]

for prd in label_period:
    if(calc_sec(prd[f_idx].split(' ')[1])>=tm_f):
        new_prd = [prd[s_idx], date + ' ' + calc_ts(tm_f), prd[lb_idx]]
        new_label_period.append(new_prd)
        
        tm_s += fivemin
        tm_f += fivemin
    else:
        new_label_period.append(prd)
                
    if(calc_sec(prd[s_idx].split(' ')[1])<tm_s):
        new_prd = [date + ' ' + calc_ts(tm_s), prd[f_idx], prd[lb_idx]]
        new_label_period.append(new_prd)

In [None]:
all_periods_label = []

for t_i in range(int(floor_start), int(ceil_finish), fivemin):
    period_lb = [0 for i in range(len(LABELS))]
    for prd in new_label_period:
        if(calc_sec(prd[s_idx].split(' ')[1])>=t_i and calc_sec(prd[f_idx].split(' ')[1])<=t_i+fivemin):
            period_lb[prd[lb_idx]] += calc_sec(prd[f_idx].split(' ')[1])-calc_sec(prd[s_idx].split(' ')[1])
            period_lb[prd[lb_idx]] = round(period_lb[prd[lb_idx]], 3)
            
    all_periods_label.append(period_lb)

In [None]:
df_all = pd.DataFrame(all_periods_label, columns=labels_list)

## Plot Bar Graph

In [None]:
pos = list(range(len(df_all['sit'])))
width = 0.2
colors = ['crimson','gold','lime','dodgerblue']

fig, ax = plt.subplots(figsize=(10,5))

for i in range(len(LABELS)):
    plt.bar([p + i*width for p in pos],
            df_all[labels_list[i]],
            width,
            alpha=0.5,
            color=colors[i],
            label=labels_list[i])
    
ax.set_xticks([p + 1.5 * width for p in pos])

xtick_labels = [calc_ts(floor_start + i*fivemin) + '-' + calc_ts(floor_start + (i+1)*fivemin)
                for i in range(len(df_all))]
ax.set_xticklabels(xtick_labels)

ax.set_ylabel('Time (sec)')

plt.xlim(min(pos)-width, max(pos)+width*4)
plt.legend(loc='upper left')
plt.title('Activity Summary for Subject ID: ' + s)

plt.show()