In [6]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from os import listdir

In [7]:
%run preprocess_for_SVM.ipynb

In [8]:
%run eval_score.ipynb

In [9]:
def calc_sec(time):
    hms = time.split(':')
    hms = [float(x) for x in hms]
    sec = hms[2] + hms[1]*60 + hms[0]*3600
    sec = round(sec,3)
    return sec

In [10]:
def calc_ts(sec):
    ts = ''
    hr = int(sec/3600)
    mn = int((sec - (hr*3600))/60)
    sc = sec - (hr*3600) - (mn*60)
    sc = round(sc,3)
    ts += str(hr) + ':' + str(mn) + ':' + str(sc)
    # print(ts)
    return ts

# Get the Actual Timestamp Labels

In [24]:
def load_actual_timer(test_subj):
    timer_dir = 'DDC_Data/' + test_subj + '/'
    timer_files = [f for f in listdir(timer_dir) if 'history_amdtimer' in f]
    timer_path = timer_dir + timer_files[0]
    
    cols = ['sid', 'timestamp', 'duration', 'label']
    df_timer = pd.read_csv(timer_path, header=None, names=['sid','raw_label','timestamp', 'duration'])
    sid_list = np.array(list(df_timer['sid']))

    tf = list(map(lambda x: x==int(test_subj), sid_list))
    df_sid = df_timer[tf]
    df_sid = df_sid.reset_index(drop=True)
    
    timer_arr = []
    time_start = []
    time_finish = []

    for i in range(len(df_sid)):
        if(df_sid.loc[i]['raw_label']=='upstairs' or 
          df_sid.loc[i]['raw_label']=='downstairs'):
            timer_arr.append('walk')
        else:
            timer_arr.append(df_sid.loc[i]['raw_label'])

        fin = calc_sec(df_sid.loc[i]['timestamp'].split(' ')[1]) + calc_sec(df_sid.loc[i]['duration'])
        ts_fin = calc_ts(fin)
        time_start.append(df_sid.loc[i]['timestamp'].split(' ')[1])
        time_finish.append(ts_fin)

    df_sid['label'] = pd.Series(timer_arr)
    df_sid['start'] = pd.Series(time_start)
    df_sid['finish'] = pd.Series(time_finish)
    
    return df_sid

# Load Data of the Subject

In [12]:
def load_data(test_subj, df_sid):
    filepath = 'DDC_Data/' + test_subj + '/' + test_subj + '-log_acc.csv'

    df_test = pd.read_csv(filepath, header=None, names=['x','y','z','timestamp'])

    test_filt = [i for i in range(len(df_test)) 
                 if calc_sec(df_test.loc[i]['timestamp'].split(' ')[1])<=calc_sec(df_sid.loc[len(df_sid)-1]['finish']) 
                 and calc_sec(df_test.loc[i]['timestamp'].split(' ')[1])>=calc_sec(df_sid.loc[0]['start'])]

    df_test_filt = df_test[df_test.index.isin(test_filt)]
    df_test = df_test_filt.reset_index(drop=True)
    
    return df_test

# Preprocess (PCA, impure)

In [29]:
def preprocess_data(df_test, pca):
    ts_list = []
    g = 9.8

    X_ = []
    
    for i in range(len(df_test)):
        X_i = [df_test.loc[i]['x']/g, df_test.loc[i]['y']/g, df_test.loc[i]['z']/g]
        ts_list.append(df_test.loc[i]['timestamp'])

        X_.append(X_i)
        
    X_ = np.array(X_)
    X_visua = np.vstack(X_)

    ts_list = np.array(ts_list)
    
    X_vis_pca = pca.transform(X_visua)
    X_vis_imp, ts_list_imp = prepare_impure_label(X_vis_pca, ts_list)
    
    return X_vis_imp, ts_list_imp

# Predict

In [14]:
def predict(X_vis_imp, ts_list_imp):
    y_t_pred_ = svm_model.predict(X_vis_imp)
    print("Finished prediction")
    
    y_t_pred = combine(X_vis_imp, y_t_pred_)
    
    y_dict = {}
    y_dict['timestamp'] = ts_list_imp
    y_dict['y_pred'] = y_t_pred

    df_y = pd.DataFrame(y_dict)
    
    return df_y

# Prepare Actual Labels

In [15]:
def prepare_actual_lb(df_test, df_y, df_sid):
    sid_idx = 0
    label_arr = []

    for i in range(len(df_test)):
        ts = df_test.loc[i]['timestamp'].split(' ')[1]
        date = df_test.loc[i]['timestamp'].split(' ')[0]
        ts_sec = calc_sec(ts)
        if(date==df_sid.loc[sid_idx]['timestamp'].split(' ')[0]):
            if(ts_sec>=calc_sec(df_sid.loc[sid_idx]['start']) and ts_sec<=calc_sec(df_sid.loc[sid_idx]['finish'])):
                label_arr.append(label_dict[df_sid.loc[sid_idx]['label']])

            elif(ts_sec>calc_sec(df_sid.loc[sid_idx]['finish']) and sid_idx<len(df_sid)-1):
                sid_idx += 1
                label_arr.append(None)

            elif(sid_idx==len(df_sid) or ts_sec<calc_sec(df_sid.loc[sid_idx]['start'])):
                label_arr.append(None)
        else:
            label_arr.append(None)
            
    df_test['y_actual'] = pd.Series(np.array(label_arr))
    df_y['y_actual'] = pd.Series(np.array(label_arr))
    
    return df_test, df_y

In [16]:
def get_actual_periods(df_test):
    keep = 0
    actual_periods = [[] for i in range(len(LABELS))]
    
    for i in range(len(df_test)):
        keep_lb = df_test.loc[keep]['y_actual']

        if(i+1<len(df_test) and keep_lb!=df_test.loc[i+1]['y_actual'] 
           and df_test.loc[i]['y_actual']!=None 
           and df_test.loc[keep]['y_actual']!=None):
            actual_periods[df_test.loc[i]['y_actual']].append([keep, i])

            keep = i+1

        elif(df_test.loc[i]['y_actual']==None and df_test.loc[i+1]['y_actual']!=None):
#             actual_periods[-1].append(i)

            keep = i+1

        elif(i==len(df_test)-1):
            if(df_test.loc[i]['y_actual']!=None):
                actual_periods[df_test.loc[i]['y_actual']].append([keep, i-1])    

    actual_periods = np.array(actual_periods)
    
    return actual_periods

# Prepare Predicted Labels

In [17]:
def get_predicted_periods(df_y):
    label_period = []
    period_list = [[] for i in range(len(LABELS))]
    pred_periods = [[] for i in range(len(LABELS))]

    keep = 0

    for i in range(len(df_y)):

        keep_lb = df_y.loc[keep]['y_pred']

        if(keep_lb!=df_y.loc[i]['y_pred']):
            if(keep!=0):
                label_period.append([df_y.loc[keep]['timestamp'], df_y.loc[i-1]['timestamp'], 
                                     df_y.loc[i-1]['y_pred']])

                period_list[df_y.loc[i-1]['y_pred']].append([df_y.loc[keep]['timestamp'], df_y.loc[i-1]['timestamp']])

            if(df_y.loc[i]['y_pred']!=None):
                pred_periods[df_y.loc[i-1]['y_pred']].append([keep, i-1])               

            keep = i

        elif(i==len(df_y)-1):
            label_period.append([df_y.loc[keep]['timestamp'], df_y.loc[i-1]['timestamp'], 
                                     df_y.loc[i-1]['y_pred']])

            period_list[df_y.loc[i-1]['y_pred']].append([df_y.loc[keep]['timestamp'], df_y.loc[i-1]['timestamp']])

            if(df_y.loc[i]['y_pred']!=None):
                pred_periods[df_y.loc[i-1]['y_pred']].append([keep, i]) 

    pred_periods = np.array(pred_periods)
    
    return pred_periods

In [18]:
def postprocess_predicted(pred_periods, df_y):
    onesec = 1 ### Threshold
    T = 0.16 ### Time stamp

    pp_periods = []
    for p_lb in pred_periods:
        temp = []
        for p in p_lb:
            if(p[1]-p[0]>int(onesec*2/T)):
                temp.append([p[0],p[1]])
        pp_periods.append(temp)

    pp_periods = np.array(pp_periods)
    
    no_val = -1
    all_run = [no_val for i in range(len(df_y))]

    for i in range(len(pp_periods)):
        for p in pp_periods[i]:
            for j in range(p[0],p[1]+1):
                all_run[j] = i

    for i in range(len(all_run)-1,0,-1):
        if(all_run[i-1]==no_val):
            all_run[i-1] = all_run[i]

    return all_run

# Plot X, Y, Z-axis Accelerations with Labels Highlighted

In [19]:
def plot_highlighted(test_subj, df_test, pred_periods, actual_periods):
    ax1 = df_test.plot(y=['x','y','z'], figsize=(15,4), color=['r','g','b'])

    color_list = ['indianred','khaki','lightgreen','skyblue']   # sit, sleep, stand, walk

    for i in range(len(pred_periods)):
        for item in pred_periods[i]:
            ax1.axvspan(item[0], item[1], color=color_list[i])
            
    ax2 = df_test.plot(y=['x','y','z'], figsize=(15,4), color=['r','g','b'])

    for i in range(len(actual_periods)):
        for item in actual_periods[i]:
            ax2.axvspan(item[0], item[1], color=color_list[i])
       
    ax1.set_title('Prediction for ' + test_subj)
    ax2.set_title('Actual for ' + test_subj)
    ax1.legend(loc='upper right')
    ax2.legend(loc='upper right')
    
    fig1 = ax1.get_figure()
    fig2 = ax2.get_figure()
    
    plt.show()
    plt.close(fig1)
    plt.close(fig2)

# Evaluation

In [20]:
def evaluate(df_y):
    LABELS = ['sit', 'sleep', 'stand', 'walk']
    
    df_y_notnull = df_y.dropna()
    df_y_notnull = df_y_notnull.reset_index(drop=True)

    actual_y = list(df_y_notnull['y_actual'])
    pred_y = list(df_y_notnull['y_pred'])
    
    last = len(pred_y)
    
    for i in range(len(pred_y)):
        if(pred_y[i]==-1):
            last = i
            break

    pred_y = pred_y[:last]
    actual_y = actual_y[:last]
    
    acc = accuracy_score(actual_y, pred_y)
    print(acc)

    show_conf_matrix(actual_y, pred_y, LABELS)
    show_clf_report(actual_y, pred_y, LABELS)

# Function Call

In [32]:
all_subjects = [str(i) for i in range(3001,3006)]

In [30]:
def call_functions(all_subjects):
    for s in all_subjects:
        print("Loading {0}'s data".format(s))

        df_sid = load_actual_timer(s)
        df_test = load_data(s, df_sid)

        X_vis_imp, ts_list_imp = preprocess_data(df_test, pca)
        df_y = predict(X_vis_imp, ts_list_imp)

        df_test, df_y = prepare_actual_lb(df_test, df_y, df_sid)

        actual_periods = get_actual_periods(df_test)
        pred_periods = get_predicted_periods(df_y)
        pp_all_run = postprocess_predicted(pred_periods, df_y)

        df_y['y_pred'] = pd.Series(pp_all_run)
        pp_periods = get_predicted_periods(df_y)
#         plot_highlighted(s, df_test, pp_periods, actual_periods)