# Going deep into Human Activity Recognition

**Elia Bonetto, Filippo Rigotto.**

Department of Information Engineering, University of Padova, Italy.

Human Data Analytics, a.y. 2018/2019

## Part 3 - SVM

In [0]:
import os

In [0]:
from pprint import pprint
import json
from datetime import datetime
import pytz

import math
import h5py
import numpy as np
import scipy as sp
import scipy.io

import pandas as pd
pd.set_option('display.precision',3)
pd.set_option('display.float_format', '{:0.3f}'.format)

from sklearn.metrics import classification_report, confusion_matrix

import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
mpl.rcParams['figure.figsize'] = (10,6)
mpl.rcParams['axes.grid'] = True

import tensorflow as tf
from tensorflow.keras.layers import Input, Dense, Activation, BatchNormalization, Flatten, Dropout
from tensorflow.keras.layers import Conv1D, MaxPooling1D, LSTM, GRU
from tensorflow.keras.layers import TimeDistributed, RepeatVector, UpSampling1D, UpSampling2D
from tensorflow.keras.regularizers import l2
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.callbacks import ModelCheckpoint, LearningRateScheduler, EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.utils import to_categorical, plot_model

#import logging
#logging.getLogger('tensorflow').disabled = True

from tensorflow.keras import backend as K
K.set_image_data_format('channels_last')

## Data loading

Start from previously preprocessed data, altrady splitted in train and test parts.

In [0]:
map_decode = {
    0: 'running',
    1: 'walking',
    2: 'jumping',
    3: 'standing',
    4: 'sitting',
    5: 'lying',
    6: 'falling'
}
num_classes = len(map_decode)

In [0]:
def eval(model, x_test, y_test, y_test_orig, model_suffix, out_folder='./output', num_classes=7):
    print(f"\nEvaluation of {model_suffix}")
    metrics = {}
    
    # get predictions
    preds = model.predict(x_test)
    y_pred = preds #np.argmax(preds)

    classes_num = list(map(str,range(num_classes))) # classes list as str integers
    classes = list(map_decode.values())
    metrics['classes'] = classes

    # build per-class metrics and confusion matrix
    cr = classification_report(y_test_orig, y_pred, output_dict=True)

    cm = confusion_matrix(y_test_orig, y_pred)
    cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis] # normalization

    acc_class = [cm[i,i] for i in range(num_classes)]
    prc_class = [cr[cl]['precision'] for cl in cr if cl in classes_num] # exclude avgs
    rec_class = [cr[cl]['recall']    for cl in cr if cl in classes_num]
    f1_class  = [cr[cl]['f1-score']  for cl in cr if cl in classes_num]

    metrics['acc-class'] = acc_class
    metrics['precision-class'] = prc_class
    metrics['recall-class'] = rec_class
    metrics['f1-class'] = f1_class
    metrics['averages'] = cr['macro avg']
    metrics['weighted-averages'] = cr['weighted avg']
    del metrics['averages']['support']
    del metrics['weighted-averages']['support']
    print()
    pprint(metrics)

    # conversion to pure python float before saving to json
    for item in metrics:
        if type(metrics[item]) == np.float64 or type(metrics[item]) == np.float32:
            metrics[item] = float(metrics[item])

    # save evaluation dict, confusion matrix and its plot
    os.makedirs(os.path.join(out_folder,model_suffix), exist_ok=True)
    with open(os.path.join(out_folder, model_suffix, f"evaluation-{model_suffix}.json"),'w') as efile:
        json.dump(metrics, efile, indent=2)

    np.save(os.path.join(out_folder, model_suffix, f"confusion-{model_suffix}.npy"), cm)

    plt.figure()
    sns.heatmap(cm, annot=True, cmap='Blues', xticklabels=classes, yticklabels=classes)
    plt.xlabel('Predicted class')
    plt.ylabel('True class')
    plt.tight_layout()
    fname = os.path.join(out_folder, model_suffix, f"plot-confusion-{model_suffix}")
    plt.savefig(fname+'.png')
    plt.savefig(fname+'.pdf', format='pdf')
    plt.close()

In [0]:
from sklearn.model_selection import ParameterGrid
from sklearn.linear_model import SGDClassifier
from sklearn.metrics import f1_score, hinge_loss,confusion_matrix
import parfit.parfit as pf

dic = {
    'ARS-train-test-sensor-framed-aug-onlytrain-rot-per-norm.h5':'SAHC', # manual aug  
    'ARS-train-test-body-framed-aug-onlytrain-rot-per-norm.h5':'BAHC',   # manual aug  
    'ARS-train-test-sensor-framed-aug-onlytrain-norm.h5':'SADA',         # adasyn
    'ARS-train-test-sensor-framed-norm.h5':'SNOR',                       # not augmented
    'ARS-train-test-body-framed-norm.h5':'BNOR',                         # not augmented
    'ARS-train-test-sensor-framed.h5':'SFRA'                             # not normalized
}

for i in dic.keys():
    print("---------------------------------")
    print(i)
    print("---------------------------------")
    with h5py.File(f'dataset/{i}','r') as h5f:
        X_train = h5f['X_train'][:] # IMU data w.r.t body frame
        X_test  = h5f['X_test'][:]  # activities (labels)
        Y_train = h5f['Y_train'][:]
        Y_test  = h5f['Y_test'][:]

    num_data = len(X_train)

    # categorical structures are needed for the loss function to work properly
    # original test classes are needed for prediction steps
    Y_test_orig  = Y_test.copy()
    Y_train_orig = Y_train.copy()
    Y_train = to_categorical(Y_train, num_classes=num_classes, dtype=np.uint8)
    Y_test  = to_categorical(Y_test,  num_classes=num_classes, dtype=np.uint8)

    x_tr = X_train.reshape((X_train.shape[0], X_train.shape[2]*X_train.shape[1]))
    x_te = X_test.reshape((X_test.shape[0], X_test.shape[2]*X_test.shape[1]))
    y_tr = X_train.reshape((X_train.shape[0], X_train.shape[2]*X_train.shape[1]))
    y_te = X_test.reshape((X_test.shape[0], X_test.shape[2]*X_test.shape[1]))
    x_tr = x_tr.reshape(-1,1,9)
    x_te = x_te.reshape(-1,1,9)
    y_tr = y_tr.reshape(-1,1,9)
    y_te = x_te.reshape(-1,1,9)
    input_shape = (x_tr.shape[1], x_tr.shape[2])
    
    from tensorflow.keras.models import Model, Sequential, load_model
    for j in ['cnn','lstm','mixed']:
        print(f"-----------{j}------------")
        if not os.path.exists(f'/home/eliab/Desktop/{dic[i]}/model-best-a-{j}.h5'):
            continue
        model = load_model(f'/home/eliab/Desktop/{dic[i]}/model-best-a-{j}.h5')

        DL_input = Input(input_shape)
        DL_model = DL_input
        for layer in model.layers[:2]:
            DL_model = layer(DL_model)
        DL_model = Model(inputs=DL_input, outputs=DL_model)
        for layer in DL_model.layers:
            layer.trainable = False
        data = DL_model.predict(x_tr, verbose = 0)
        data = data.reshape(X_train.shape[0],-1)
        data_te = DL_model.predict(x_te, verbose = 0)
        data_te = data_te.reshape(X_test.shape[0], -1)
        
        print("SVM-l2")
        clf = SGDClassifier(alpha=0.0001, average=False, class_weight=None,
              early_stopping=False, epsilon=0.1, eta0=0.0, fit_intercept=True,
              l1_ratio=0.15, learning_rate='optimal', loss='hinge', max_iter=1000,
              n_iter_no_change=5, n_jobs=-1, penalty='l2', power_t=0.5,
              random_state=42, shuffle=True, tol=0.001, validation_fraction=0.1,
              verbose=0, warm_start=False)
        clf.fit(data, Y_train_orig) 
        print(clf.score(data, Y_train_orig))
        print(clf.score(data_te, Y_test_orig))
        eval(clf, data_te, Y_test, Y_test_orig, model_suffix=f'SVM-l2-{dic[i]}-{j}')
        print("\\\\\\\\\\\\\\\\\\\\\\\\\\\\")
        
        print("SVM-l1")
        clf = SGDClassifier(alpha=0.0001, average=False, class_weight=None,
            early_stopping=False, epsilon=0.1, eta0=0.0, fit_intercept=True,
            l1_ratio=0.15, learning_rate='optimal', loss='hinge', max_iter=1000,
            n_iter_no_change=5, n_jobs=-1, penalty='l1', power_t=0.5,
            random_state=42, shuffle=True, tol=0.001, validation_fraction=0.1,
            verbose=0, warm_start=False)
        clf.fit(data, Y_train_orig) 
        print(clf.score(data, Y_train_orig))
        print(clf.score(data_te, Y_test_orig))
        eval(clf, data_te, Y_test, Y_test_orig, model_suffix=f'SVM-l1-{dic[i]}-{j}')
        print("\\\\\\\\\\\\\\\\\\\\\\\\\\\\")
        
        print("SVM-elasticnet")
        clf = SGDClassifier(alpha=0.0001, average=False, class_weight=None,
              early_stopping=False, epsilon=0.1, eta0=0.0, fit_intercept=True,
              l1_ratio=0.15, learning_rate='optimal', loss='hinge', max_iter=1000,
              n_iter_no_change=5, n_jobs=-1, penalty='elasticnet', power_t=0.5,
              random_state=42, shuffle=True, tol=0.001, validation_fraction=0.1,
              verbose=0, warm_start=False)
        clf.fit(data, Y_train_orig) 
        print(clf.score(data, Y_train_orig))
        print(clf.score(data_te, Y_test_orig))
        eval(clf, data_te, Y_test, Y_test_orig, model_suffix=f'SVM-elasticnet-{dic[i]}-{j}')
        print("\\\\\\\\\\\\\\\\\\\\\\\\\\\\")
        
        print("Log-l2")
        clf = SGDClassifier(alpha=0.0001, average=False, class_weight=None,
              early_stopping=False, epsilon=0.1, eta0=0.0, fit_intercept=True,
              l1_ratio=0.15, learning_rate='optimal', loss='log', max_iter=1000,
              n_iter_no_change=5, n_jobs=-1, penalty='l2', power_t=0.5,
              random_state=42, shuffle=True, tol=0.001, validation_fraction=0.1,
              verbose=0, warm_start=False)
        clf.fit(data, Y_train_orig) 
        print(clf.score(data, Y_train_orig))
        print(clf.score(data_te, Y_test_orig))
        eval(clf, data_te, Y_test, Y_test_orig, model_suffix=f'Log-l2-{dic[i]}-{j}')
        print("\\\\\\\\\\\\\\\\\\\\\\\\\\\\")
        
        print("Log-l1")
        clf = SGDClassifier(alpha=0.0001, average=False, class_weight=None,
              early_stopping=False, epsilon=0.1, eta0=0.0, fit_intercept=True,
              l1_ratio=0.15, learning_rate='optimal', loss='log', max_iter=1000,
              n_iter_no_change=5, n_jobs=-1, penalty='l1', power_t=0.5,
              random_state=42, shuffle=True, tol=0.001, validation_fraction=0.1,
              verbose=0, warm_start=False)
        clf.fit(data, Y_train_orig) 
        print(clf.score(data, Y_train_orig))
        print(clf.score(data_te, Y_test_orig))
        eval(clf, data_te, Y_test, Y_test_orig, model_suffix=f'Log-l1-{dic[i]}-{j}')
        print("\\\\\\\\\\\\\\\\\\\\\\\\\\\\")
        
        print("Log-elasticnet")
        clf = SGDClassifier(alpha=0.0001, average=False, class_weight=None,
              early_stopping=False, epsilon=0.1, eta0=0.0, fit_intercept=True,
              l1_ratio=0.15, learning_rate='optimal', loss='log', max_iter=1000,
              n_iter_no_change=5, n_jobs=-1, penalty='elasticnet', power_t=0.5,
              random_state=42, shuffle=True, tol=0.001, validation_fraction=0.1,
              verbose=0, warm_start=False)
        clf.fit(data, Y_train_orig) 
        print(clf.score(data, Y_train_orig))
        print(clf.score(data_te, Y_test_orig))
        eval(clf, data_te, Y_test, Y_test_orig, model_suffix=f'Log-elasticnet-{dic[i]}-{j}')
        print("\\\\\\\\\\\\\\\\\\\\\\\\\\\\")
        
    print("---------------------------------")
    print("-------------END-----------------")
    print("---------------------------------")


---------------------------------
ARS-train-test-sensor-framed-aug-onlytrain-rot-per-norm.h5
---------------------------------
-----------cnn------------
SVM-l2
0.8287638984462714
0.7880663241475946

Evaluation of SVM-l2-SAHC-cnn

{'acc-class': [0.8226950354609929,
               0.9127649088220798,
               0.25821596244131456,
               0.917550058892815,
               0.35468277945619336,
               0.9886506935687264,
               0.12727272727272726],
 'averages': {'f1-score': 0.6326900784993125,
              'precision': 0.6874029248656205,
              'recall': 0.6259760237021214},
 'classes': ['running',
             'walking',
             'jumping',
             'standing',
             'sitting',
             'lying',
             'falling'],
 'f1-class': [0.760655737704918,
              0.8717345257707696,
              0.35143769968051125,
              0.82086406743941,
              0.4970364098221846,
              0.9781659388646288,
             