**Qustion: Can IMU data of a frame be used to predict the active duty cycles (meaningful states detected)?** 

Here we treat it as a binary classification problem.  IMU data -> active cycle (T/F)

In [114]:
from __future__ import absolute_import, division, print_function

import collections
import glob
import itertools
import json
import matplotlib
rc_fonts = {
    "font.weight": 800,
    "font.family": "serif",
    "font.serif": ["Times"], # use latex's default
    "font.sans-serif": ["DejaVu Sans"],
    "text.usetex": True,
}
matplotlib.rcParams.update(rc_fonts)
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import pickle
import re
import sqlalchemy as sa
from numpy import linalg as LA

from rmexp import dbutils, config
from rmexp import schema
from rmexp.schema import models

In [115]:
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import GridSearchCV, StratifiedKFold
from sklearn.pipeline import Pipeline, make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC

from scipy.ndimage.filters import gaussian_filter1d

In [116]:
def insert_or_update_one(sess, model, keys_dict, vals_dict):
    record = sess.query(model).filter_by(**keys_dict).one_or_none()
    if record is not None:
        record.update(vals_dict)
    else:
        create_dict = {}
        create_dict.update(keys_dict)
        create_dict.update(vals_dict)
        record = model(**create_dict)
        sess.add(record)
    return record

In [143]:
APP = "pingpong"

if APP == 'lego':
    ## settings for Lego
    # 5 and 8 lack patterns
    TRAIN_TRACES = ['lego-tr' + str(i) for i in (1, 2, 3, 4, 6)]
    TEST_TRACES = ['lego-tr' + str(i) for i in (16, 18)]
    HANDLABELED_PASSIVE = {}
    HANDLABELED_ACTIVE = {
        'lego-tr1': [
            (230, 330),
            (430, 560),
            (730, 920),
            (1000, 1300),
            (1600, 1760),
            (2088, 2200),
            (2400, 2605),
        ],
        'lego-tr2': [
            (160, 390),
            (600, 850),
            (990, 1250),
            (1300, 1560),
            (1790, 2100),
            (2200, 2480),
            (2700, 2981),
            
        ],
        'lego-tr3': [
            (300, 500),
            (630, 860),
            (1090, 1290),
            (1360, 1660),
            (1800, 2000),
            (2200, 2480),
            (2660, 2858),
        ],
        'lego-tr4': [
            (200, 330),
            (560, 810),
            (1100, 1360),
            (1530, 1750),
            (1950, 2130),
            (2450, 2660),
            (2860, 3030),
            
        ],
        'lego-tr6': [
            (150, 330),
            (600, 800),
            (1400, 1560),
            (2200, 2440),
            (2700, 2900),
            (3240, 3500),
            (3850, 4083),
            
        ],
        'lego-tr16': [
            (300, 430),
            (700, 830),
            (1160, 1330),
            (1600, 1730),
            (2000, 2120),
            (2260, 2400),
            (2700, 2790),     
        ],
        'lego-tr18': [
            (210, 300),
            (490, 590),
            (730, 900),
            (1060, 1190),
            (1560, 1700),
            (1860, 2000),
            (2160, 2330),     
        ],
    }
#     HANDLABELED_ACTIVE = {}
#     HANDLABELED_PASSIVE = {
#         'lego-tr2': [
#             (13.5*30, 17.5*30),
#             (28.5*30, 32.5*30),
#             (43.5*30, 45.5*30),
#             (54.5*30, 60.5*30),
#             (73.5*30, 76.5*30),
#             (85.5*30, 92.5*30),
#         ],
#     }
    def filter_active(ss_df):
        return ss_df['val'].str.contains('\[\[')
    
elif APP == 'pingpong':
    ## settings for Ping Pong
    # 6-10 are longer traces Junjue and zf recorded
    TRAIN_TRACES = ['pingpong-tr' + str(i) for i in (6, 7, 8, 9, 10)]
    TEST_TRACES = ['pingpong-tr' + str(i) for i in [10]]
    HANDLABELED_PASSIVE = {
        'pingpong-tr10': [
            ( 8.5*30, 12.5*30),
            (33.5*30, 45.5*30),
            (57.5*30, 63.5*30),
            (64.5*30, 69.5*30),
        ]
    }
#     HANDLABELED_PASSIVE = {}
    HANDLABELED_ACTIVE = {}
    def filter_active(ss_df):
        return ~(ss_df['val'].str.contains('Cannot find table')
                | ss_df['val'].str.contains('Detected table too small')
                | ss_df['val'].str.contains('Table top line')
                | ss_df['val'].str.contains('Table doesn\'t occupy')
                | ss_df['val'].str.contains('Angle between two side')
                | ss_df['val'].str.contains('Valid area too small')
               )
elif APP == "pool":
    TRAIN_TRACES = ['pool-tr' + str(i) for i in range(1,5)]
    TEST_TRACES = ['pool-tr' + str(i) for i in range(1,5)]
    def filter_active(ss_df):
        return ~(ss_df['val'].str.contains('Cannot find'))
    #Thresholds used thresholds = [1.0,1.2,1.0,1.4] y_pred = 1 - X[:,0]>threshold
elif APP == "ikea":
    TRAIN_TRACES = ['ikea-tr' + str(i) for i in [11, 12]]
    TEST_TRACES = ['ikea-tr' + str(i) for i in [1,4,7,11,12]]
#     TEST_TRACES = ['ikea-tr' + str(i) for i in range(1,8)]
    HANDLABELED_PASSIVE = {}
    HANDLABELED_ACTIVE = {
        'ikea-tr1': [
            (184, 370),
            (670, 950),
            (1180, 1360),
            (2094, 2600),
            (2730, 2920),
            (3830, 3850),
            (4000, 4230),
            (4790, 5070),
        ],
        'ikea-tr3': [
            (180, 360),
            (980, 1190),
            (1460, 1610),
            (2450, 2890),
            (3180, 3250),
            (3900, 3960),
            (4200, 4440),
            (4700, 4980),
        ],
        'ikea-tr4': [
            (210, 360),
            (770, 980),
            (1140, 1320),
            (2000, 2270),
            (2800, 2880),
            (3656, 3700),
            (3790, 4060),
            (4440, 4650),
        ],
        'ikea-tr5': [
            (200, 350),
            (810, 1000),
            (1170, 1350),
            (1960, 2370),
            (2760, 2900),
            (3450, 3540),
            (3730, 4010),
            (4260, 4510),
        ], 
        'ikea-tr7': [
            (250, 600),
            (1076, 1130),
            (1820, 2130),
            (2600, 3060),
            (3230, 3500),
            (4200, 4340),
            (4880, 5200),
            (5700, 5960),
        ],  
        'ikea-tr11': [
            (260, 630),
            (1160, 1660),
            (1940, 2230),
            (3160, 4300),
            (4430, 4800),
            (5530, 5720),
            (6030, 6400),
            (6730, 6900),
        ], 
        'ikea-tr12': [
            (400, 730),
            (1460, 2200),
            (2480, 2800),
            (3400, 3760),
            (5000, 5330),
            (6090, 6200),
            (6560, 6900),
            (7290, 7500),
        ],    
        }
    
#     HANDLABELED_ACTIVE = {}
    def filter_active(ss_df):
        return ~(ss_df['val'].str.contains('No objects detected'))

        

In [49]:
# load IMU csv files into MySQL

def load_IMU_to_DB(trace_name, base_dir):
    trace_num = re.search('\d+', trace_name).group(0)
    csv_name = glob.glob(os.path.join(base_dir,
                                      trace_num, '*.csv'))[0]
    print("Using file as IMU data:", csv_name)
    df = pd.read_csv(csv_name,index_col='frame_id')
    df['sensor_timestamp'] = pd.to_datetime(df['sensor_timestamp'])
    
    new = 0
    for row in df.itertuples():
        keys_dict = {'name': trace_name,
                     'trace': trace_num,
                     'index': row.Index
                    }
        vals_dict = {'sensor_timestamp': row.sensor_timestamp.to_pydatetime(),
                     'rot_x': row.rot_x,
                     'rot_y': row.rot_y,
                     'rot_z': row.rot_z,
                     'acc_x': row.acc_x,
                     'acc_y': row.acc_y,
                     'acc_z': row.acc_z
                    }
        
        insert_or_update_one(sess, models.IMU, keys_dict, vals_dict)

    print("Updated: ", len(sess.dirty))
    print("New: ", len(sess.new))
    sess.commit()

# sess = dbutils.get_session()
# for trace_name in set(TEST_TRACES):
#     load_IMU_to_DB(trace_name, '/home/junjuew/work/resource-management/data/ikea-trace')
# sess.close()

Using file as IMU data: /home/junjuew/work/resource-management/data/ikea-trace/7/2019_05_23-06_36_28.csv




Updated:  0
New:  1
Using file as IMU data: /home/junjuew/work/resource-management/data/ikea-trace/12/2019_05_23-08_42_13.csv
Updated:  0
New:  1
Using file as IMU data: /home/junjuew/work/resource-management/data/ikea-trace/11/2019_05_23-08_51_20.csv
Updated:  0
New:  1


In [118]:
def train_clf(trace_names, 
              svm_kargs={}, 
              X_transform_func=lambda x: x, 
              param_grid = {'svm__C': [1., 10.], 'svm__kernel': ['linear',]}):
    Xs = []
    ys = []
    
    for trace in trace_names:
        X1, y1 = get_raw_Xy(trace)
        X1 = X_transform_func(X1)
        Xs.append(X1)
        ys.append(y1)
    
    X = np.vstack(Xs)
    y = np.concatenate(ys)
    print(X.shape)
    print("Total frames: ", X.shape[0], "Active frames: ", np.count_nonzero(y))
    
#     skf = StratifiedKFold(n_splits=5)
    
    clf = Pipeline([
        ('scaler', StandardScaler(with_mean=True, with_std=True)),
        ('svm', SVC(**svm_kargs)),
    ])
    
    clf = GridSearchCV(clf, param_grid=param_grid, cv=5, n_jobs=4)

    clf.fit(X, y)
    y_pred = clf.predict(X)
    cm = confusion_matrix(y, y_pred)
#     print("Training confusion matrix:\n", cm)
    return clf, cm, (X, y, y_pred)

def eval_clf(clf, trace_names, X_transform_func=lambda x:x, threshold=0):
    Xs = []
    ys = []
    for trace in trace_names:
        X1, y1 = get_raw_Xy(trace)
        X1 = X_transform_func(X1)
        Xs.append(X1)
        ys.append(y1)
        
    X = np.vstack(Xs)
    y = np.concatenate(ys)
    
#     y_pred = np.ones_like(y) - (X[:,0]>threshold)
    y_pred = clf.predict(X)
    cm = confusion_matrix(y, y_pred)
    return cm, (X, y, y_pred)

In [119]:
# calibration data using zero-movement readings
#  select avg(rot_x), avg(rot_y), avg(rot_z), avg(acc_x), avg(acc_y), avg(acc_z) from IMU where name = "lego-tr0";
IMU_CALIBRATE = [0.0006005733204134366, -0.0010699289405684755, 0.0010396479328165374,
                -0.4246709463824289, 10.076913759689923, 0.31374152131782945]

# the -tr0 IMU readings from lego and pingpong are quite different ...

In [120]:
# Classifiction problem: 
# Features: 6 IMU readings per frame -> transform_func -> X
# Label y: 0 - passive, 1 - active.
# use SVM with GridSearch

def get_raw_Xy(trace, calibrate=True):
    imu_df = get_IMU_pd(trace)    
    use_imu_cols = ['rot_x','rot_y','rot_z','acc_x','acc_y','acc_z']
    X1 = imu_df[use_imu_cols].values
    if calibrate:
        # adjust for calibration
        X1 = X1 - np.array(IMU_CALIBRATE)
    
    # if we have hand labeled passive phase, use it.
    # otherwise use computer vision SS to proxy ground truth
    if trace in HANDLABELED_PASSIVE:
        print("Using hand labeled GT:", trace)
        y1 = np.ones_like(X1[:, 0])
        for start, end in HANDLABELED_PASSIVE[trace]:
            start = int(start)
            end = int(end)
            y1[start-1: end] = 0
    elif trace in HANDLABELED_ACTIVE:
        print("Using hand labeled GT:", trace)
        y1 = np.zeros_like(X1[:, 0])
        for start, end in HANDLABELED_ACTIVE[trace]:
            y1[start: end] = 1
    else:
        ss_df = get_SS_pd(trace)
        # SS sometimes has one more frame than IMU, drop it
        ss_df = ss_df[ss_df['index'] <= imu_df['index'].max()]
        y1 = np.zeros_like(X1[:, 0])
        active_inds = ss_df[filter_active(ss_df)]['index'].values
        y1[active_inds] = 1 

    
    print("%s has %d/%d active/total frames" % (trace, np.count_nonzero(y1), y1.shape[0]))
    return X1, y1

In [121]:
# reading from MySQL using Pandas API

def get_SS_pd(trace_name):
    df = pd.read_sql('SELECT * FROM SS WHERE name = %s', schema.engine, params=[trace_name,])
    df['index'] = df['index'].astype(int) - 1   # SS's index counts from 1
    df = df.sort_values('index')
    return df
    
def get_IMU_pd(trace_name):
    df = pd.read_sql('SELECT * FROM IMU WHERE name = %s', schema.engine, params=[trace_name,])
    df['index'] = df['index'].astype(int)
    df = df.sort_values('index')
    return df

In [100]:
# Retrain using a chosen trade-off
W = 1.
if APP == 'lego':
    W = 4.5
elif APP == 'pingpong':
    W = 2.
elif APP == 'pool':
    W = 1.
elif APP == 'ikea':
    W = 2.0
svm_kargs={'class_weight': {0: 1.0, 1: W}, 'random_state': 42, 'verbose': True},
# clf, cm, _ = train_clf(TRAIN_TRACES, 
#                         X_transform_func=transform_acc_x,
#                         svm_kargs={'class_weight': {0: 1.0, 1: W}, 'random_state': 42, 'verbose': True, 'probability': True},
#                         param_grid = {'svm__C': [0.1,1.,10.], 'svm__kernel': ['linear']})
clf, cm, _ = train_clf(TRAIN_TRACES, 
                        X_transform_func=transform_x,
                        svm_kargs={'class_weight': 'balanced', 'random_state': 42, 'verbose': True, 'probability': True},
                        param_grid = {'svm__C': [0.1,1.,10.], 'svm__kernel': ['linear']})
print(cm)
threshold = 0
# thresholds = [1.3,1.32,1.35,1.36,1.4]
# for threshold in thresholds:
#     print("Threshold: ",threshold)
# cm, _ = eval_clf(clf, TEST_TRACES, X_transform_func=transform_acc_x, threshold=threshold)
cm, _ = eval_clf(clf, TEST_TRACES, X_transform_func=transform_x, threshold=threshold)

print("Eval cm:\n", cm)

# clf = clf.best_estimator_
# thresholds = [0.5]
# for threshold in thresholds:
#     print("Threshold: ",threshold)
#     cm, _ = eval_clf(clf, TEST_TRACES, X_transform_func=transform_acc_x, threshold=threshold)
#     print("Eval cm:\n", cm)
with open('IMU_' + APP + '_clf.pkl', 'w') as f:
    pickle.dump(clf, f)
# print("OK")

Using hand labeled GT: ikea-tr11
ikea-tr11 has 3400/7081 active/total frames
Using hand labeled GT: ikea-tr12
ikea-tr12 has 2740/7819 active/total frames
(14900, 3)
Total frames:  14900 Active frames:  6140
[LibSVM][[ 597 8163]
 [   0 6140]]
Using hand labeled GT: ikea-tr1
ikea-tr1 has 1872/5365 active/total frames
Using hand labeled GT: ikea-tr4
ikea-tr4 has 1414/4911 active/total frames
Using hand labeled GT: ikea-tr7
ikea-tr7 has 2164/6125 active/total frames
Using hand labeled GT: ikea-tr11
ikea-tr11 has 3400/7081 active/total frames
Using hand labeled GT: ikea-tr12
ikea-tr12 has 2740/7819 active/total frames
Eval cm:
 [[ 1266 18445]
 [   20 11570]]


In [126]:
# manually engineered features
# select acc_x, acc_y, Gaussian smooth, and substract cummulative mean

def past_windowed_1d(x, size, func):
    # output[i] -> func(x[i-size+1:i+1])
    # aka windowed function on the past `size` frames
    # should only call on one trace
    assert x.ndim == 1
    rv = []
    for i in range(x.shape[0]):
        wind = x[max(0, i - size + 1): i+1]  # including i
        rv.append(func(wind))
        
    return np.array(rv)

def transform_acc_x(X_imu6):
    accx = X_imu6[:,3]  # acc_x
    accy = X_imu6[:,4] # acc_y
    accz = X_imu6[:,4] # acc_z
    
    accx_smooth_mean = past_windowed_1d(accx, accx.shape[0],
                                   lambda w: gaussian_filter1d(w, 10)[-1] - np.mean(w))
    accy_smooth_mean = past_windowed_1d(accy, accy.shape[0],
                                   lambda w: gaussian_filter1d(w, 10)[-1] - np.mean(w))
    accz_smooth_mean = past_windowed_1d(accz, accz.shape[0],
                                   lambda w: gaussian_filter1d(w, 10)[-1] - np.mean(w))
    
#     X_combined = np.stack([accx_smooth_mean, accy_smooth_mean, accz_smooth_mean], axis=1)
    X_combined = np.stack([accx_smooth_mean, accy_smooth_mean, accz_smooth_mean], axis=1)
    return X_combined

# def transform_x(X_imu6):
#     X_imu6 = X_imu6[:,:4]
#     X_combined = np.zeros_like(X_imu6)
#     for i in range(X_imu6.shape[1]):
#         x_in = X_imu6[:,i]
#         x = past_windowed_1d(x_in, x_in.shape[0],
#                                     lambda w: gaussian_filter1d(w, 10)[-1] - np.mean(w))
#         X_combined[:,i] = x
#     return X_combined

In [138]:
# Impact on latency of detecting state change compared to oracle
def get_state_change_frames(ss_df, k=5):
    # returns dict(ss -> first detection of change)
    # require k consecutive frames to declare state change

#     ss_df = ss_df.reset_index(drop=True)  # reset Index to row id
    state_change = dict()
    
    if APP == 'lego':
        cur_state = None
        cur_count = 0

        # only count "useful" SS
        for r in ss_df.itertuples():
            ss, frame_id = r.val, r.index
            if ss not in state_change and '[[' in ss:
                if ss == cur_state:
                    cur_count += 1
                    if cur_count >= k:
                        state_change[ss] = frame_id # commit
                else:
                    cur_state = ss # change tracking state
                    cur_count = 0
            else:
                cur_state = None
                cur_count = 0
                
    elif APP == 'pingpong':
        k_active = 5
        k_passive = 30
        cur_state_active = False
        cur_count = 0
        active_phase = 0
        prev_frame_id = -1
        
        for r in ss_df.itertuples():
            ss, frame_id = r.val, r.index
            
            if frame_id - prev_frame_id > 1:
                # IMU suppressed frames. as if they are all passive frames
                if cur_state_active:
                    if frame_id - 1 - prev_frame_id >= k_passive:
                        cur_state_active = False
                        cur_count = 0
                    else:
                        cur_count += frame_id - 1 - prev_frame_id
                else:
                    cur_count = 0  # just extending passive streak
            
            active = True
            for pat in ['Cannot find table', 'Detected table too small', 'Table top line', 
                        'Table doesn\'t occupy', 'Angle between two side', 'Valid area too small']:
                if pat in ss:
                    active = False
                    break
            
            if cur_state_active ^ active:
                cur_count += 1
                if active and cur_count >= k_active: # commit
                    cur_state_active = True
                    cur_count = 0
                    new_state = 'active' + str(active_phase)
                    active_phase += 1
                    state_change[new_state] = frame_id
                elif not active and cur_count >= k_passive:
                    cur_state_active = False
                    cur_count = 0
                else:
                    pass
            else:
                cur_count = 0
                
            prev_frame_id = frame_id
    elif APP=="lego":
        #LABELS = ["base", "pipe", "shade", "shadetop", "buckle", "blackcircle", "lamp", "bulb", "bulbtop"]
        STATES = ["base", "pipe", "shade", "buckle", "blackcircle", "bulbtop"]
        current_state = 0
        len_state = len(STATES)
        state_change = []
        for r in ss_df.itertuples():
            ss, frame_id = r.val, r.index
            if(frame_id==618):
                print(ss)
            if current_state == len_state:
                break
            if STATES[current_state] in ss:
                state_change.append(frame_id)
                current_state += 1
        if len(state_change)!= len_state:
            print("Error! {} {}".format(len(state_change),len_state))

    return state_change


def compare_state_change_latency(gt_dict, test_dict):
    if APP == 'lego':
        return dict([(k, test_dict.get(k, float('inf')) - gt_dict[k]) for k in gt_dict])
    elif APP == 'pingpong':
        last_gt_change = None
        ret = {}
        gt_points = np.sort(np.array(gt_dict.values()))
        for k, v in test_dict.iteritems():
            gt_change = gt_points[gt_points <= v][-1]
            if gt_change > last_gt_change:
                ret[k] = v - gt_change
                last_gt_change = gt_change
            else:
                pass # false change
            
        return ret
            
    
def eval_load_and_latency(traces, clf, threshold=0):
    df = pd.DataFrame()
#     thresholds = [1.0,1.2,1.0,1.4]
    for i, trace_name in enumerate(traces):
        print(trace_name)
        

        cm, (_, y, y_pred) = eval_clf(clf, [trace_name,], X_transform_func=transform_acc_x)#, threshold=thresholds[i])
        tn, fp, fn, tp = cm.ravel()
        
        r = {
            'trace_name': trace_name,
            'dropped_total': 1. * (tn + fn) / (tn+fp+fn+tp),
            'dropped_active': 1. * (fn) / (tp + fn),
            'dropped_passive': 1. * (tn) / (tn + fp)
        }
    
        # plot
        print(y_pred.shape)
        x = np.arange(y_pred.shape[0])
        plt.figure(figsize=(4.5, 1.75))
#         plt.plot(y[::], 'b.', label='GT frames', markersize=2)
        plt.plot(y[::], 'b-', label='Ground Truth', markersize=4)
        plt.plot(x[y_pred==0],y_pred[y_pred==0] - .1, 'r.', label='Suppressed frames by IMU', markersize=4)
        
        
        if APP == 'ikea':
            ss_df = get_SS_pd(trace_name)
            gt_state_change = get_state_change_frames(ss_df)
            imu_filtered_df = ss_df.iloc[np.nonzero(y_pred)]
            imu_filtered_state_change = get_state_change_frames(imu_filtered_df)
            latency = compare_state_change_latency(gt_state_change, imu_filtered_state_change)
            
#             print(json.dumps(gt_state_change, indent=4))
#             print(json.dumps(imu_filtered_state_change, indent=4))
#             print(json.dumps(latency, indent=4))
            
#             plt.vlines(gt_state_change.values(), .6, 1.2, 'k', label='GT State change')
            plt.vlines(gt_state_change, .6, 1.2, 'k', label='GT State change')
            #plt.vlines(imu_filtered_state_change.values(), -.2, .5, 'k', linestyle='dotted',
            #           label='IMU-based State change')
#             r.update({
#                 'change_delay_frames_mean': np.mean(latency.values()),
#                 'change_delay_frames_max': np.max(latency.values())
#             })

        plt.yticks([0, 1], ['Passive', 'Active'])
        plt.xlabel('Frame Sequence')
        plt.legend(loc='lower center', bbox_to_anchor=(.5, 1.0), ncol=2)
        plt.tight_layout()
        plt.savefig('fig-imu-%s.pdf' % trace_name, bbox_inches='tight')
#         plt.title("Ground truth vs. IMU-based Suppression (trace: %s)" % trace_name)

        df = df.append(r, ignore_index=True)
    return df

def eval_thresholding(traces, threshold=1):
    df = pd.DataFrame()
    for trace in traces:
        X, y = get_raw_Xy(trace)
        X1 = transform_x(X)
        E = np.sum((X1**2),axis=1)
        y_pred = np.ones_like(y)
        y_pred[E>threshold] = 0
        cm = confusion_matrix(y, y_pred)
        tn, fp, fn, tp = cm.ravel()
        
        r = {
            'trace_name': trace,
            'dropped_total': 1. * (tn + fn) / (tn+fp+fn+tp),
            'dropped_active': 1. * (fn) / (tp + fn),
            'dropped_passive': 1. * (tn) / (tn + fp)
        }
        print(y_pred.shape)
        x = np.arange(y_pred.shape[0])
        plt.figure(figsize=(5, 2.5))
#         plt.plot(y[::], 'b.', label='GT frames', markersize=2)
        plt.plot(y[::], 'b-', label='Ground Truth', markersize=4)
        plt.plot(x[y_pred==0],y_pred[y_pred==0] - .1, 'r.', label='Suppressed frames by IMU', markersize=4)
        plt.yticks([0, 1], ['Passive', 'Active'])
        plt.xlabel('Frame Sequence')
        plt.legend(loc='lower center', bbox_to_anchor=(.5, 1.0), ncol=2)
        plt.tight_layout()
        plt.savefig('fig-imu-%s.pdf' % trace_name, bbox_inches='tight')
        df = df.append(r, ignore_index=True)
    return df, cm, y, y_pred
        
    

In [98]:
# tune weight of label 1 to trade off FN and FP
# on average 30% frames are active for Lego

weights = np.arange(2., 12., 2.)
result_df = pd.DataFrame()

for w in weights:
    print("weight:", w)
    svm_kargs = {'class_weight': {0: 1.0, 1: w}, 'random_state': 42}
#     clf, cm, _ = train_clf(TRAIN_TRACES, X_transform_func=transform_acc_x, svm_kargs=svm_kargs)
#     cm, _ = eval_clf(clf, TEST_TRACES, X_transform_func=transform_acc_x)
    clf, cm, _ = train_clf(TRAIN_TRACES, svm_kargs=svm_kargs, X_transform_func=transform_x)
    cm, _ = eval_clf(clf, TEST_TRACES, X_transform_func=transform_x)
    print("Eval cm:\n", cm)
    result_df = result_df.append({'weight': w, 'confusion_matrix': cm, 'params': clf.best_params_}, ignore_index=True)

print(result_df)

weight: 1.0
Using hand labeled GT: ikea-tr11
ikea-tr11 has 3400/7081 active/total frames
(7081, 3)
(7081,)
(7081,)
(7081,)
Using hand labeled GT: ikea-tr12
ikea-tr12 has 2740/7819 active/total frames
(7819, 3)
(7819,)
(7819,)
(7819,)
(14900, 3)
Total frames:  14900 Active frames:  6140
Using hand labeled GT: ikea-tr1
ikea-tr1 has 1872/5365 active/total frames
(5365, 3)
(5365,)
(5365,)
(5365,)
Using hand labeled GT: ikea-tr4
ikea-tr4 has 1414/4911 active/total frames
(4911, 3)
(4911,)
(4911,)
(4911,)
Using hand labeled GT: ikea-tr7
ikea-tr7 has 2164/6125 active/total frames
(6125, 3)
(6125,)
(6125,)
(6125,)
Using hand labeled GT: ikea-tr11
ikea-tr11 has 3400/7081 active/total frames
(7081, 3)
(7081,)
(7081,)
(7081,)
Using hand labeled GT: ikea-tr12
ikea-tr12 has 2740/7819 active/total frames
(7819, 3)
(7819,)
(7819,)
(7819,)
Eval cm:
 [[19711     0]
 [11590     0]]
weight: 1.5
Using hand labeled GT: ikea-tr11
ikea-tr11 has 3400/7081 active/total frames
(7081, 3)
(7081,)
(7081,)
(7081,)


In [144]:
%matplotlib notebook

clf = pickle.load(open('IMU_' + APP + '_clf.pkl', 'r'))
    
test_set_result = eval_load_and_latency(TEST_TRACES, clf)
# test_set_result,_,_,_ = eval_thresholding(TEST_TRACES) # for ikea thresholding Energy(X[:4])
test_set_result.to_csv('IMU_result.txt')
print(test_set_result)

pingpong-tr10
Using hand labeled GT: pingpong-tr10
pingpong-tr10 has 1419/2233 active/total frames
(2233,)


<IPython.core.display.Javascript object>

   dropped_active  dropped_passive  dropped_total     trace_name
0        0.035941         0.558968       0.226601  pingpong-tr10


In [None]:
# dirty playground
%matplotlib notebook

base_dir = '/home/junjuew/work/resource-management/data/ikea-trace'


def get_gt(trace,X1):
    ss_df = get_SS_pd(trace)
    ss_df = ss_df[ss_df['index'] < X1.shape[0]]
    y1 = np.zeros_like(X1[:, 0])
    active_inds = ss_df[filter_active(ss_df)]['index'].values
    y1[active_inds] = 1
    return y1

def get_raw_Xy(trace_name):
    trace_num = re.search('\d+', trace_name).group(0)
    csv_name = glob.glob(os.path.join(base_dir,
                                      trace_num, '*.csv'))[0]
    print("Using file as IMU data:", csv_name)
    df = pd.read_csv(csv_name,index_col='frame_id')
    use_imu_cols = ['rot_x','rot_y','rot_z','acc_x','acc_y','acc_z']
    X = df[use_imu_cols].values
    X = X - np.array(IMU_CALIBRATE)
    y = get_gt(trace_name,X)
    return X, y 

In [104]:
for trace_name in sorted(set(TRAIN_TRACES + TEST_TRACES)):
    X, y = get_raw_Xy(trace_name)
#     X, y = get_X_y_csv(trace_name, base_dir)
#     X_uncalibrated, _ = get_raw_Xy(trace_name, calibrate=False)

    X_accx = X[:,3]

    X_smooth = past_windowed_1d(X_accx, 90, lambda w: np.mean(w))

    X_cummin = np.minimum.accumulate(X_smooth)
    X_cummax = np.maximum.accumulate(X_smooth)
    X_cummean = np.cumsum(X_smooth) / (np.arange(X_smooth.shape[0]) + 1)

    # X_star = X_smooth - np.cumsum(X_accx) / (np.arange(X_accx.shape[0]) + 1)
    # X_star = gaussian_filter1d(X_accx,10) - np.mean(X_accx)
    X_star = past_windowed_1d(X_accx, X_accx.shape[0], lambda w: gaussian_filter1d(w, 10)[-1] - np.mean(w))

    y_pred = np.array(X_star < 0.1, dtype=np.int8)

    # hot fix: always transmit first 90 frames (~3 sec)
    y_pred[:90] = 1

    fn_inds = np.nonzero(np.logical_and(y == 1, y_pred == 0))[0]
    fp_inds = np.nonzero(np.logical_and(y == 0, y_pred == 1))[0]

#     X_used = transform_acc_x(X)
    X_used = transform_x(X)
#     X_used = X_used[:,[0,2]] #removing y for ikea
    energy = np.sum((X_used**2),axis=1)
    if True:
        plt.figure(figsize=(5,3))

        for i in range(X_used.shape[1]):
            plt.plot(X_used[:,i], label=str(i)+' calibrated')
        plt.plot(energy, label='energy')

        plt.plot(y*5, 'bo', label='GT board')
    #     plt.plot(X_accx, 'g', label ='acc x')

    #     plt.plot(X_smooth , 'k', label ='X smooth')
    #     plt.plot(X_cummean, 'c', label='cum mean')
    #     plt.plot(X_smooth < np.mean(X_smooth), label='offset mean')
    #     plt.plot(X_cummin, 'c', label ='X cummax')
    #     plt.plot(X_cummax, 'm', label ='X cummin')
    #     plt.plot(X_star, 'r', label ='acc x (smoothed&normalized)')
    #     plt.plot(gaussian_filter1d(X_accx,10), label='Gaussian smooth')
    #     plt.plot(fn_inds, np.ones_like(fn_inds) , 'yx', label = 'FN')
    #     plt.plot(fp_inds, np.zeros_like(fp_inds) , 'mx', label = 'FP')
        plt.legend(loc='lower right')
        plt.title(trace_name)

    print("Corr coef:\n", np.corrcoef(X_star, y))

Using hand labeled GT: ikea-tr1
ikea-tr1 has 1872/5365 active/total frames


<IPython.core.display.Javascript object>

Corr coef:
 [[ 1.         -0.33163484]
 [-0.33163484  1.        ]]
Using hand labeled GT: ikea-tr11
ikea-tr11 has 3400/7081 active/total frames


<IPython.core.display.Javascript object>

Corr coef:
 [[ 1.         -0.10624561]
 [-0.10624561  1.        ]]
Using hand labeled GT: ikea-tr12
ikea-tr12 has 2740/7819 active/total frames


<IPython.core.display.Javascript object>

Corr coef:
 [[1.00000000e+00 7.15754808e-04]
 [7.15754808e-04 1.00000000e+00]]
Using hand labeled GT: ikea-tr4
ikea-tr4 has 1414/4911 active/total frames


<IPython.core.display.Javascript object>

Corr coef:
 [[ 1.         -0.11720252]
 [-0.11720252  1.        ]]
Using hand labeled GT: ikea-tr7
ikea-tr7 has 2164/6125 active/total frames


<IPython.core.display.Javascript object>

Corr coef:
 [[ 1.         -0.06844078]
 [-0.06844078  1.        ]]


In [112]:
def load_IMU_Suppression_to_DB(trace_name):
#     clf = pickle.load(open('IMU_' + APP + '_clf.pkl', 'r'))
#     cm, (_, y, y_pred) = eval_clf(clf, [trace_name,], X_transform_func=transform_acc_x)
    _, cm, y, y_pred = eval_thresholding([trace_name,]) 
    print(trace_name)
    print(cm)
    trace_num = re.search('\d+', trace_name).group(0)
    for idx in range(y_pred.shape[0]):
        keys_dict = {'name': trace_name,
                     'trace': trace_num,
                     'index': idx
                    }
        vals_dict = {'suppression': 1 - int(y_pred[idx]),
                    }
        
        insert_or_update_one(sess, models.IMUSuppression, keys_dict, vals_dict)

    print("Updated: ", len(sess.dirty))
    print("New: ", len(sess.new))
    sess.commit()



sess = dbutils.get_session()
for i, trace_name in enumerate(TEST_TRACES):
    print(trace_name)
    load_IMU_Suppression_to_DB(trace_name)
sess.close()

ikea-tr1
Using hand labeled GT: ikea-tr1
ikea-tr1 has 1872/5365 active/total frames
(5365,)


<IPython.core.display.Javascript object>

ikea-tr1
[[ 637 2856]
 [   5 1867]]
Updated:  0
New:  1
ikea-tr4
Using hand labeled GT: ikea-tr4
ikea-tr4 has 1414/4911 active/total frames
(4911,)


<IPython.core.display.Javascript object>

ikea-tr4
[[ 168 3329]
 [   0 1414]]
Updated:  0
New:  1
ikea-tr7
Using hand labeled GT: ikea-tr7
ikea-tr7 has 2164/6125 active/total frames
(6125,)


<IPython.core.display.Javascript object>

ikea-tr7
[[ 368 3593]
 [   0 2164]]
Updated:  0
New:  1
ikea-tr11
Using hand labeled GT: ikea-tr11
ikea-tr11 has 3400/7081 active/total frames
(7081,)


<IPython.core.display.Javascript object>

ikea-tr11
[[ 883 2798]
 [ 102 3298]]
Updated:  0
New:  1
ikea-tr12
Using hand labeled GT: ikea-tr12
ikea-tr12 has 2740/7819 active/total frames
(7819,)


<IPython.core.display.Javascript object>

ikea-tr12
[[ 769 4310]
 [  28 2712]]
Updated:  0
New:  1


In [113]:
def load_DutyCycleGT_to_DB(trace_name):
    print(trace_name)
    active_gt = []
    for trace in [trace_name]:
        _, y1 = get_raw_Xy(trace)
        active_gt.append(y1)
    active_gt = np.array(active_gt).ravel()
    print(active_gt.shape)
#     print(active_gt[0])
#     active_gt = np.array(active_gt)
    print(len(np.where(active_gt==1)[0]))
#     print(active_gt.shape)
#     plt.plot(active_gt.ravel(), 'b-', markersize=4)
#     plt.yticks([0, 1], ['Passive', 'Active'])
#     plt.xlabel('Frame Sequence')
#     plt.show()
    trace_num = re.search('\d+', trace_name).group(0)
    print(trace_name)
    for idx in range(len(active_gt)):
        keys_dict = {'name': APP,
                     'trace': trace_num,
                     'index': idx
                    }
        vals_dict = {'active': int(active_gt[idx]),
                    }

        
        insert_or_update_one(sess, models.DutyCycleGT, keys_dict, vals_dict)

    print("Updated: ", len(sess.dirty))
    print("New: ", len(sess.new))
    sess.commit()
    return active_gt



sess = dbutils.get_session()
for i, trace_name in enumerate(TEST_TRACES):
    print(trace_name)
    load_DutyCycleGT_to_DB(trace_name)
sess.close()

ikea-tr1
ikea-tr1
Using hand labeled GT: ikea-tr1
ikea-tr1 has 1872/5365 active/total frames
(5365,)
1872
ikea-tr1
Updated:  0
New:  1
ikea-tr4
ikea-tr4
Using hand labeled GT: ikea-tr4
ikea-tr4 has 1414/4911 active/total frames
(4911,)
1414
ikea-tr4
Updated:  0
New:  1
ikea-tr7
ikea-tr7
Using hand labeled GT: ikea-tr7
ikea-tr7 has 2164/6125 active/total frames
(6125,)
2164
ikea-tr7
Updated:  0
New:  1
ikea-tr11
ikea-tr11
Using hand labeled GT: ikea-tr11
ikea-tr11 has 3400/7081 active/total frames
(7081,)
3400
ikea-tr11
Updated:  0
New:  1
ikea-tr12
ikea-tr12
Using hand labeled GT: ikea-tr12
ikea-tr12 has 2740/7819 active/total frames
(7819,)
2740
ikea-tr12
Updated:  0
New:  1
