# Going deep into Human Activity Recognition

**Elia Bonetto, Filippo Rigotto.**

## Part 5 - Area 51

In this _Discarded Material_ part, there is reference methods and code that may be useful in the future, but are not used in the main parts of this work.

In [0]:
import logging
logging.getLogger('tensorflow').disabled = True

In [0]:
def per_class_accuracy(y_true, y_preds, class_labels):
    # for reference. confusion matrix diag is used instead
    return [np.mean([
            (y_true[pred_idx] == np.round(y_pred)) 
                for pred_idx, y_pred in enumerate(y_preds) 
                    if y_true[pred_idx] == int(class_label)
        ]) for class_label in class_labels]

def halfLRafterEpoch(epoch):
    # for reference. lambda func is used instead
    initial_lrate = 0.1
    drop_rate = 0.5
    epochs_drop = 10.0
    return initial_lrate * math.pow(drop, math.floor((1+epoch)/epochs_drop))

In [0]:
with open(os.path.join(out_folder, 'history.json'),'w') as hfile:
        hpd = pd.DataFrame(history.history)
        json.dump(json.loads(hpd.to_json()), hfile, indent=2)

        #json.dump(history.history, hfile, indent=2)
        # native json module can't handle float32 objects
        # pandas can and is used as a preprocessor to json module

Checkpoint saving

In [0]:
with h5py.File('dataset/ARS-raw.h5','w') as h5f:
    h5f.create_dataset('imu_sensor', data=imu_sensor)
    h5f.create_dataset('attitudes',  data=attitude_mat)
    h5f.create_dataset('activities', data=activities)

# optional reload if messing up below
with h5py.File('dataset/ARS-raw.h5','r') as h5f:
    imu_sensor = h5f['imu_sensor'][:]
    attitude_mat = h5f['attitudes'][:]
    activities = h5f['activities'][:]

with h5py.File('dataset/ARS.h5','w') as h5f:
    h5f.create_dataset('imu_sensor', data=imu_sensor)
    h5f.create_dataset('imu_body', data=imu_body)
    h5f.create_dataset('attitudes', data=attitude_mat)
    h5f.create_dataset('activities', data=activities)

with h5py.File('dataset/ARS-framed.h5','w') as h5f:
    h5f.create_dataset('imu_sensor', data=imu_sensor_framed)
    h5f.create_dataset('imu_body',   data=imu_body_framed)
    h5f.create_dataset('activities', data=activities_sensor_framed)
    #h5f.create_dataset('activities_body', data=activities_body_framed) # useless duplicate of prev item

# optional reload if messing up below
with h5py.File('dataset/ARS-framed.h5','r') as h5f:
    imu_sensor_framed = h5f['imu_sensor'][:]
    imu_body_framed = h5f['imu_body'][:]
    activities_sensor_framed = h5f['activities'][:]
    activities_body_framed = activities_sensor_framed.copy()

## Computing weighted accuracies

In [0]:
dataset_names = {
    'ARS-train-test-body-framed-aug-onlytrain-rot-per-norm.h5' : 'BAHC',   # manual aug
    'ARS-train-test-body-framed-aug-onlytrain-rot-per.h5' : 'BAHCNN',
    'ARS-train-test-body-framed-aug-onlytrain-norm.h5' : 'BADA',           # adasyn
    'ARS-train-test-body-framed-aug-onlytrain.h5' : 'BADANN',
    'ARS-train-test-body-framed-norm.h5' : 'BNOR',                         # not augmented
    'ARS-train-test-body-framed.h5' : 'BFRA',                              # not normalized  
    'ARS-train-test-sensor-framed-aug-onlytrain-rot-per-norm.h5' : 'SAHC',
    'ARS-train-test-sensor-framed-aug-onlytrain-rot-per.h5' : 'SAHCNN',
    'ARS-train-test-sensor-framed-aug-onlytrain-norm.h5' : 'SADA',
    'ARS-train-test-sensor-framed-aug-onlytrain.h5' : 'SADANN',
    'ARS-train-test-sensor-framed-norm.h5' : 'SNOR',
    'ARS-train-test-sensor-framed.h5' : 'SFRA'
}

In [0]:
def get_folders(odir='output'):
    folders = []
    for d1 in [f for f in os.listdir(os.path.join(odir)) if os.path.isdir(os.path.join(odir, f))]:
        for d2 in [f for f in os.listdir(os.path.join(odir,d1)) if os.path.isdir(os.path.join(odir, d1, f))]:
            for d3 in [f for f in os.listdir(os.path.join(odir,d1,d2)) if os.path.isdir(os.path.join(odir, d1, d2, f))]:
                #print(f"{d1} <> {d2} <> {d3}")
                folders.append(os.path.join(odir,d1,d2,d3))
    return folders

In [0]:
num_classes = 7
folders = get_folders()
for ds in dataset_names:
    print(f"Doing {dataset_names[ds]}")
    with h5py.File(f"dataset/{ds}",'r') as h5f:
        #X_train = h5f['X_train'][:] # IMU data
        X_test  = h5f['X_test'][:]  # activities
        #Y_train = h5f['Y_train'][:]
        Y_test  = h5f['Y_test'][:]

    samples = []
    for activity in range(num_classes):
        samples.append(len(X_test[Y_test == activity]))
    print(f"Samples: {samples}")
    #samples_w = [ val/sum(samples) for val in samples ]
    #pprint(samples_w)
    
    int_folders = [f for f in folders if dataset_names[ds] in f and dataset_names[ds]+'NN' not in f and 'train' not in f]
    for fld in int_folders:
        print(f"  Doing {fld}")
        for eval_file in [f for f in os.listdir(fld) if 'evaluation' in f]:
            print(f"    Doing {eval_file}:\t",end='')
            with open(os.path.join(fld,eval_file),'r') as ef:
                efj = json.load(ef)
            acc = efj['acc-class']
            summed = 0
            for i in range(num_classes):
                summed += acc[i]*samples[i]
            weighted_a = summed / sum(samples)

            print(f"{weighted_a} while acc in json is {efj['acc']}")
            #efj['weighted-averages']['accuracy'] = weighted_a
            #with open(os.path.join(fld,eval_file+'2'),'w') as ef:
            #    json.dump(efj, ef, indent=2)

## LSTM in pure Tensorflow

In [0]:
# model definition

features = 32 # number of hidden layer's features

#batch = 1500 # TODO unused vars
#n_iters = 300
#tot_iters = Y_train.shape[0] * n_iters
#disp_iter = 1000

w = {
    'h' : tf.Variable(tf.random_normal([X_train.shape[2], features])),
    'o' : tf.Variable(tf.random_normal([features, Y_train.shape[1]], mean=1.0))
}
b = {
    'h' : tf.Variable(tf.random_normal([features])),
    'o' : tf.Variable(tf.random_normal([Y_train.shape[1]]))
}

def LSTM(X, w, b):
    # input processing
    X = tf.transpose(X,[1,0,2])         # (batch_size, steps, input)
    X = tf.reshape(X, [-1, X.shape[2]]) # (steps*batch, n_initial_"features")

    X = tf.nn.relu(tf.matmul(X, w['h']) + b['h'])
    X = tf.split(X, X_train.shape[1])
    
    # model
    l_1 = tf.contrib.rnn.BasicLSTMCell(features, forget_bias=1.0, state_is_tuple=True)
    l_2 = tf.contrib.rnn.BasicLSTMCell(features, forget_bias=1.0, state_is_tuple=True)    
    lstm = tf.contrib.rnn.MultiRNNCell([l_1,l_2], state_is_tuple=True)    
    
    # output
    out, state = tf.contrib.rnn.static_rnn(lstm, X, dtype=tf.float32)
    
    return tf.matmul(out[-1], w['o']) + b['o']

In [0]:
# define a dataset object on input
ds_obj = tf.data.Dataset.from_tensor_slices((X_train.astype(np.float32), Y_train.astype(np.float32))).repeat().batch(300)
iter = ds_obj.make_one_shot_iterator()
x, y = iter.get_next()

prediction = LSTM(x, w, b)
accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(prediction,1), tf.argmax(y,1)),tf.float32))

# losses, optimizer
lr = 0.0025
lambda_l = 0.0015

l2_norm = lambda_l * sum(tf.nn.l2_loss(tf_var) for tf_var in tf.trainable_variables())
softmax_cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=prediction)) + l2_norm
adam = tf.train.AdamOptimizer(learning_rate=lr).minimize(softmax_cost)

# run training
test_log  = {'loss':[], 'acc':[]}
train_log = {'loss':[], 'acc':[]}
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    for i in range(1000): #epochs
        _, l, a = sess.run([adam, softmax_cost, accuracy])
        train_log['loss'].append(l)
        train_log['acc'].append(a)
        
        l,a = sess.run([softmax_cost, accuracy], feed_dict={x:X_test.astype(np.float32), y:Y_test.astype(np.float32)})
        test_log['loss'].append(l)
        test_log['acc'].append(a)
        #print("PERFORMANCE ON TEST SET: " + \
        #      "Batch Loss = {}".format(l) + \
        #      ", Accuracy = {}".format(a))
print('Reached {}'.format(max(test_log['acc'])))

# save stuff and plots
out_folder = os.path.join('output', datetime.now(pytz.timezone('Europe/Rome')).strftime('%y%m%d-%H%M%S')+'_LSTM-TF')
if not os.path.exists(out_folder):
    os.mkdir(out_folder)

with open(os.path.join(out_folder, 'history.json'),'w') as hfile:
    json.dump({'training':train_log, 'validation':test_log}, hfile, indent=2)

plt.figure()
plt.plot(train_log['loss'], label='Training')
plt.plot( test_log['loss'], label='Validation')
plt.legend()
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.tight_layout()
fname = os.path.join(out_folder, 'plot-loss')
plt.savefig(fname+'.png')
plt.savefig(fname+'.pdf', format='pdf')
plt.close()

plt.figure()
plt.plot(train_log['acc'], label='Training')
plt.plot( test_log['acc'], label='Validation')
plt.legend()
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.tight_layout()
fname = os.path.join(out_folder, 'plot-accuracy')
plt.savefig(fname+'.png')
plt.savefig(fname+'.pdf', format='pdf')
plt.close()

## Plots

In [0]:
names = ['F   ','AL  ','AOL ','APR ','APRL']
for t in container:
    log = f"{t}\n"
    for i,ef in enumerate(container[t]['eval']):
        log += f"{names[i]} L {ef['loss']:.4f} A {ef['acc']:.4f} "
        log += f"P {ef['precision']:.4f} R {ef['recall']:.4f} F1 {ef['f1']:.4f}\n"
    print(log)
    display(Image(container[t]['plot-accu'],width=420))
    display(Image(container[t]['plot-loss'],width=420))
    #display(Image(container[t]['plot-metr'],width=420))
    display(Image(container[t]['plot-conf1'],width=420))
    print('\n')
    display(Image(container[t]['plot-conf2'],width=600))
    display(Image(container[t]['plot-conf3'],width=600))
    print()
    display(Image(container[t]['plot-conf4'],width=600))
    display(Image(container[t]['plot-conf5'],width=600))
    print('\n')
HTML('<style>.display_data { display: inline; } .output_image { display: inline; }</style>') # very good for side-to-side iamges in colab

Classwise for best dataset

In [0]:
best_dataset = 'SADA' # sensor, adasyn, normalized

best_evals = np.array(best_evals)
selected = best_evals[best_evals[:,0] == best_dataset]
pprint(selected.tolist())
print()

for item in selected:
    ds,net,_,metric,_,_ = item
    corr = container_single[f"{ds}_{net}"]
    
    cm = corr['plots'][f"confusion-best-{metric}.png"]
    corr_eval = corr['eval'][f"best-{metric}"]
    cwa,cwf = corr_eval['acc-class'], corr_eval['f1-class']
    
    print(f"{ds} {net} {metric}\n{cwa}\n{cwf}\n")
    
    plt.figure()
    plt.bar(range(len(cwa)),cwa)
    plt.title('Accuracy')
    plt.tight_layout()
    
    plt.figure()
    plt.bar(range(len(cwf)),cwf)
    plt.title('F1')
    plt.tight_layout()
    print()
HTML('<style>.display_data { display: inline; } .output_image { display: inline; }</style>')

In [0]:
# norm vs not norm
df4 = dfAccRev.copy()[['BAHC','BAHCNN',   'BADA','BADANN',   'BNOR','BFRA',   'SAHC','SAHCNN',   'SADA','SADANN',   'SNOR','SFRA']]
display(df4)

# manual vs adasyn vs not aug.
df5 = dfAccRev.copy()[['BAHC','BADA','BNOR',   'SAHC','SADA','SNOR']]
display(df5)

# body vs sensor
df4 = dfAccRev.copy()[['BAHC','SAHC',   'BADA','SADA',   'BNOR','SNOR',   'BFRA','SFRA']]
display(df4)

In [0]:
df2 = dfAcc.copy()
df2.loc['mean'] = df2.mean() # adds mean row at the end
display(df2)

df3 = dfF1s.copy()
df3.loc['mean'] = df3.mean()
display(df3)

In [0]:
body, sensor = 'BADA','SADA' # ['BADA','BAHC'],['SADA','SAHC']
body_tests, sensor_tests = {},{}
for test in container_single:
    ds, net = test.split('_')
    if ds == body:
        body_tests[net] = container_single[test]
    elif ds == sensor:
        sensor_tests[net] = container_single[test]
assert(body_tests.keys() == sensor_tests.keys())
print(body_tests.keys())

arr = []
for key in body_tests:
    b = [body, key]
    for i in body_tests[key]['eval']['best-a']['acc-class']:
        b.append(i)
    b += [
        body_tests[key]['eval']['best-a']['acc'],
        body_tests[key]['eval']['best-a']['precision'],
        body_tests[key]['eval']['best-a']['averages']['precision'],
        body_tests[key]['eval']['best-a']['recall'],
        body_tests[key]['eval']['best-a']['averages']['recall'],
        body_tests[key]['eval']['best-a']['f1']
    ]
    s = [sensor,key]
    for i in sensor_tests[key]['eval']['best-a']['acc-class']:
        s.append(i)
    s+= [
        sensor_tests[key]['eval']['best-a']['acc'],
        sensor_tests[key]['eval']['best-a']['precision'],
        sensor_tests[key]['eval']['best-a']['averages']['precision'],
        sensor_tests[key]['eval']['best-a']['recall'],
        sensor_tests[key]['eval']['best-a']['averages']['recall'],
        sensor_tests[key]['eval']['best-a']['f1']
    ]
    arr.append(b)
    arr.append(s)
df = pd.DataFrame(arr, columns=['ds','net','c1','c2','c3','c4','c5','c6','c7','a','p','wp','r','wr','f1'])
df.pivot(index='ds', columns='net',values=['c1','c2','c3','c4','c5','c6','c7','a','p','wp','r','wr','f1'])