In [66]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [67]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [68]:
cd '/content/drive/Shared drives/Rahul_Mohit/ECE202A/WALG/'

/content/drive/Shared drives/Rahul_Mohit/ECE202A/WALG


Create a new folder in your Drive. (I created a folder named HAR) Copy the data files and the 2 pyrhon files to that folder (utils.py and existing_models.py). Then change working directory to that folder.

In [69]:
from collections import Counter
import sys
import time
import os
import pandas as pd
import tensorflow as tf
from tensorflow.keras.callbacks import ModelCheckpoint, TensorBoard
from tensorflow.keras.models import load_model
import numpy as np
from sklearn.metrics import confusion_matrix
from sklearn.metrics import f1_score, accuracy_score
import matplotlib.pyplot as plt
import itertools
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dense, Dropout, Flatten
from tensorflow.keras.layers import BatchNormalization, Permute, Reshape

In [70]:
## Function to get details of the dataset used for training.
def get_details(name):
    if (name == 'PAM2'):
        num_classes = 12
        sensors = ['acc', 'gyr', 'mag']
        locations = ['wrist', 'ankle', 'chest']
        label_names = ['Lying', 'Sitting', 'Standing', 'Walking',
                       'Running', 'Cycling', 'Nordic_walking', 'Ascending_stairs',
                       'Descending_stairs', 'Vacuum_cleaning', 'Ironing', 'Rope_jumping']
        f_hz = 100
        dimensions = ['sensor', 'location', 'frequency']
        path = './Data/'+name+'_test';
    elif (name == 'WISDM'):
        num_classes = 18
        sensors = ['acc', 'gyr']
        locations = ['wrist']
        label_names = ['Walking', 'Jogging', 'Stairs', 'Sitting', 'Standing', 'Typing',
                        'Brushing Teeth', 'Eating Soups', 'Eating Chips', 'Eating Pasta',
                        'Drinking from Cup', 'Eating Sandwich', 'Kicking Soccer Ball', 'Playing catch with Tennis Ball',
                        'Dribbling', 'Writing', 'Clapping', 'Folding Clothes']
        f_hz = 20
        dimensions = ['sensor', 'location', 'frequency']
        path = './Data/'+name+'/wisdm-dataset/processed';
    elif (name == 'WISDM_PAMAP2'):
        num_classes = 7
        sensors = ['Accelerometer', 'Gyroscope']
        locations = ['Wrist Watch']
        label_names = ['Walking', 'Stairs', 'Sitting', 'Brushing Teeth', 'Eating', 'Jogging', 'Clapping']
        f_hz = 20
        dimensions = ['sensor', 'location', 'frequency']
        path = './Data/'+name+'/';
    else:
        print("No such dataset")

    return num_classes, sensors, locations, label_names, f_hz, dimensions, path

## FUnction to load dataset for training the models.
def load_dataset(name, path, num_classes):
    if (name == 'PAM2'):
        X_train0 = np.load(os.path.join(path, 'X_train_{}.npy'.format(name)))
        y_train_binary = np.load(os.path.join(
            path, 'y_train_{}.npy'.format(name)))
        X_val0 = np.load(os.path.join(path, 'X_val_{}.npy'.format(name)))
        y_val_binary = np.load(os.path.join(path, 'y_val_{}.npy'.format(name)))
        X_test0 = np.load(os.path.join(path, 'X_test_{}.npy'.format(name)))
        y_test_binary = np.load(os.path.join(
            path, 'y_test_{}.npy'.format(name)))
    elif (name == 'WISDM'):
        X_train0 = np.load(os.path.join(path, 'X_WISDM_train.npy'))
        y_train_binary = np.load(os.path.join(path, 'Y_WISDM_train.npy'))
        X_val0 = np.load(os.path.join(path, 'X_WISDM_val.npy'))
        y_val_binary = np.load(os.path.join(path, 'Y_WISDM_val.npy'))
        X_test0 = np.load(os.path.join(path, 'X_WISDM_test.npy'))
        y_test_binary = np.load(os.path.join(path, 'Y_WISDM_test.npy'))
    elif (name == 'WISDM_PAMAP2'):
        X_train0 = np.load(os.path.join(path, 'X_WISDM_PAMAP2_train.npy'))
        y_train_binary = np.load(os.path.join(path, 'Y_WISDM_PAMAP2_train.npy'))
        X_val0 = np.load(os.path.join(path, 'X_WISDM_PAMAP2_val.npy'))
        y_val_binary = np.load(os.path.join(path, 'Y_WISDM_PAMAP2_val.npy'))
        X_test0 = np.load(os.path.join(path, 'X_WISDM_PAMAP2_test.npy'))
        y_test_binary = np.load(os.path.join(path, 'Y_WISDM_PAMAP2_test.npy'))
    else:
        print("No such dataset")

    return X_train0, y_train_binary, X_val0, y_val_binary, X_test0, y_test_binary

## Function to reshape data as required by the mnodel.
def reshape_data(X_tr, X_va, X_tst, network_type):
    _, win_len, dim = X_tr.shape

    if network_type == 'CNN' or network_type == 'ConvLSTM':
        # make it into (frame_number, dimension, window_size, channel=1) for convNet
        X_tr = np.swapaxes(X_tr, 1, 2)
        X_va = np.swapaxes(X_va, 1, 2)
        X_tst = np.swapaxes(X_tst, 1, 2)

        X_tr = np.reshape(X_tr, (-1, dim, win_len, 1))
        X_va = np.reshape(X_va, (-1, dim, win_len, 1))
        X_tst = np.reshape(X_tst, (-1, dim, win_len, 1))

    elif network_type == 'MLP':
        X_tr = np.reshape(X_tr, (-1, dim * win_len))
        X_va = np.reshape(X_va, (-1, dim * win_len))
        X_tst = np.reshape(X_tst, (-1, dim * win_len))

    return X_tr, X_va, X_tst

## Function to define CNN model.
def model_CNN(dim, win_len, num_classes, num_feat_map=64, p=0., batchnorm=True, dropout=True):
    model = Sequential(name='CNN')
    model.add(Conv2D(num_feat_map, kernel_size=(1, 3),
                     activation='relu',
                     input_shape=(dim, win_len, 1),
                     padding='same', name='Conv_1'))
    if batchnorm:
        model.add(BatchNormalization(name='Bn_1'))
    model.add(MaxPooling2D(pool_size=(1, 2), name='Max_pool_1'))
    if dropout:
        model.add(Dropout(p, name='Drop_1'))
    model.add(Conv2D(num_feat_map, kernel_size=(1, 3),
                     activation='relu', padding='same', name='Conv_2'))
    if batchnorm:
        model.add(BatchNormalization(name='Bn_2'))
    model.add(MaxPooling2D(pool_size=(1, 2), name='Max_pool_2'))
    if dropout:
        model.add(Dropout(p, name='Drop_2'))
    model.add(Conv2D(num_feat_map, kernel_size=(1, 3),
                     activation='relu', padding='same', name='Conv_3'))
    if batchnorm:
        model.add(BatchNormalization(name='Bn_3'))
    model.add(MaxPooling2D(pool_size=(1, 2), name='Max_pool_3'))
    if dropout:
        model.add(Dropout(p, name='Drop_3'))
    model.add(Flatten(name='Flatten_1'))
    model.add(Dense(32, activation='relu'))
    if batchnorm:
        model.add(BatchNormalization(name='Bn_4'))
    if dropout:
        model.add(Dropout(p, name='Drop_4'))
    model.add(Dense(num_classes, activation='softmax', name='dense_out'))
    return model


In [71]:
# Variables
d_name        = 'WISDM_PAMAP2';
network_type  = 'CNN';
batch_size    = 256;
epochs        = 50;
model_dir     = f'Models/{d_name}';
model_name    = '{}_{}'.format(network_type, int(time.time()));
filepath      = f"best_{model_name}.hdf5";
chk_path      = os.path.join(model_dir, filepath);

ACTIVITIES_MAP = {
    0: 'Walking',
    1: 'Stairs',
    2: 'Sitting',
    3: 'Brushing Teeth',
    4: 'Eating',
    5: 'Jogging',
    6: 'Clapping'
}

## Load the Dataset

Load the preprocessed data as stored in Numpy-files. Please note that the data has already been split up in a training (training), validation (val), and test subsets.

In [72]:
# Load the dataset for training
num_classes, sensors, locations, label_names, f_hz, dimensions, path = get_details(d_name)
print("Number of classes:  ", num_classes)
print("Sensors:            ", sensors)
print("Devices:            ",locations)
print("Sampling frequency: ",f_hz)

# save np.load
np_load_old = np.load

# modify the default parameters of np.load
np.load = lambda *a,**k: np_load_old(*a, allow_pickle=True, **k)

X_train0, y_train_binary, X_val0, y_val_binary, X_test0, y_test_binary = load_dataset(d_name, path, num_classes)
print ("Dataset Shapes:")
print("  Train inputs:      ",X_train0.shape);
print("  Test inputs:       ",X_test0.shape);
print("  Validation inputs: ",X_val0.shape);
print("  Train labels:      ", y_train_binary.shape);
print("  Test labels:       ",y_test_binary.shape);
print("  Validation labels: ",y_val_binary.shape);

np.load = np_load_old

# Counting data for different activities
y_train = np.argmax(y_train_binary, axis=1)
y_test = np.argmax(y_test_binary, axis=1)
y_val = np.argmax(y_val_binary, axis=1)

print ("Amount of data for each activity: ");
train_count = Counter(y_train);
print(" Training Data:");
for activity in ACTIVITIES_MAP.keys():
  print ("    {} = {}".format(ACTIVITIES_MAP[activity], train_count[activity]));
val_count = Counter(y_val);
print(" Validation Data:");
for activity in ACTIVITIES_MAP.keys():
  print ("    {} = {}".format(ACTIVITIES_MAP[activity], val_count[activity]));
test_count = Counter(y_test);
print(" Testing Data:");
for activity in ACTIVITIES_MAP.keys():
  print ("    {} = {}".format(ACTIVITIES_MAP[activity], test_count[activity]));   

# Converting all the data to float32.
X_train0        = np.asarray(X_train0).astype('float32')
X_test0         = np.asarray(X_test0).astype('float32')
X_val0          = np.asarray(X_val0).astype('float32')
y_train_binary  = np.asarray(y_train_binary).astype('float32')
y_test_binary   = np.asarray(y_test_binary).astype('float32')
y_val_binary    = np.asarray(y_val_binary).astype('float32')

Number of classes:   7
Sensors:             ['Accelerometer', 'Gyroscope']
Devices:             ['Wrist Watch']
Sampling frequency:  20
Dataset Shapes:
  Train inputs:       (31045, 202, 6)
  Test inputs:        (6469, 202, 6)
  Validation inputs:  (4151, 202, 6)
  Train labels:       (31045, 7)
  Test labels:        (6469, 7)
  Validation labels:  (4151, 7)
Amount of data for each activity: 
 Training Data:
    Walking = 4095
    Stairs = 3278
    Sitting = 7252
    Brushing Teeth = 3279
    Eating = 6671
    Jogging = 3191
    Clapping = 3279
 Validation Data:
    Walking = 582
    Stairs = 362
    Sitting = 917
    Brushing Teeth = 474
    Eating = 911
    Jogging = 438
    Clapping = 467
 Testing Data:
    Walking = 841
    Stairs = 737
    Sitting = 1465
    Brushing Teeth = 682
    Eating = 1364
    Jogging = 698
    Clapping = 682


## My Models

In [73]:
print('Reshaping data for different models ...')
X_train, X_val, X_test = reshape_data(X_train0, X_val0, X_test0, network_type)
_, win_len, dim = X_train0.shape

print('Building the model ...')
model = model_CNN(dim, win_len, num_classes, num_feat_map=64, p=0.3)
print(model.summary())

print('Training the model ...')
model.compile(loss=tf.keras.losses.categorical_crossentropy,
              optimizer='adam',
              metrics=['accuracy'])

if not os.path.exists(model_dir):
    os.makedirs(model_dir)

tensorboard = TensorBoard(log_dir = os.path.join('logs', '{}'.format(model_name)))

checkpoint = ModelCheckpoint(chk_path, monitor='val_accuracy', verbose=1, save_best_only=True, mode='max')

model.fit(X_train, y_train_binary,
          batch_size=300,
          epochs=epochs,
          verbose=1,
          shuffle=True,
          validation_data=(X_val, y_val_binary),
          callbacks=[tensorboard, checkpoint])

model.save(os.path.join(model_dir,f'final_{model_name}.hdf5'))

Reshaping data for different models ...
Building the model ...
Model: "CNN"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
Conv_1 (Conv2D)              (None, 6, 202, 64)        256       
_________________________________________________________________
Bn_1 (BatchNormalization)    (None, 6, 202, 64)        256       
_________________________________________________________________
Max_pool_1 (MaxPooling2D)    (None, 6, 101, 64)        0         
_________________________________________________________________
Drop_1 (Dropout)             (None, 6, 101, 64)        0         
_________________________________________________________________
Conv_2 (Conv2D)              (None, 6, 101, 64)        12352     
_________________________________________________________________
Bn_2 (BatchNormalization)    (None, 6, 101, 64)        256       
_________________________________________________________________


In [74]:
## Reloading saved model
print("Loading the model: {}".format(chk_path));
model = load_model(chk_path);

Loading the model: Models/WISDM_PAMAP2/best_CNN_1607824644.hdf5


In [75]:
## Testing the trained model accuracy with test data set.
y_pred = np.argmax(model.predict(X_test), axis=1)
y_true = np.argmax(y_test_binary, axis=1)
cf_matrix = confusion_matrix(y_true, y_pred)
print("Confusion Matrix: ")
print(cf_matrix)
class_wise_f1 = f1_score(y_true, y_pred, average=None)
print('The mean-f1 score: {:.4f}'.format(np.mean(class_wise_f1)))
accuracy = accuracy_score(y_true, y_pred)
print('Accuracy is: {:.4f}'.format(accuracy))

Confusion Matrix: 
[[ 639  163   14   12    5    8    0]
 [  62  627   10    1   32    5    0]
 [   0    0 1254    0  186    0   25]
 [   0    0    0  671   11    0    0]
 [   1    0  136    1 1207    0   19]
 [   6   13    0    0    4  675    0]
 [   0    0    0    4    0    0  678]]
The mean-f1 score: 0.8984
Accuracy is: 0.8890


Perform Inference on live data collected from Apple Watch

In [76]:
# Live data collected from Apple Watch
file_list = [
  '2020_11_22_Rahul_0_1', '2020_11_22_Mohit_0_1',
  '2020_11_22_Mohit_2_1',
  '2020_11_22_Rahul_3', '2020_11_21_Rahul_3', '2020_11_23_Rahul_3', '2020_11_24_Rahul_3', '2020_11_26_Rahul_3', '2020_11_29_Mohit_3',
  '2020_11_20_Rahul_4', '2020_11_29_Mohit_4',
  '2020_11_22_Rahul_1_1', '2020_11_22_Mohit_1_1', '2020_11_22_Rahul_1_2', '2020_11_22_Mohit_1_2',
  '2020_12_04_Mohit_up_1', '2020_12_04_Mohit_down_1', '2020_12_04_Rahul_up_1', '2020_12_04_Rahul_down_1',
  '2020_12_04_Mohit_5',
  '2020_11_30_Rahul_5'
];
exp_activities = [0,0,2,3,3,3,3,3,3,4,4,1,1,1,1,1,1,1,1,5,6];

for (file_name,exp_act) in zip(file_list,exp_activities):
  X_data0 = np.load('./Data/WALG_inference/X_'+file_name+'.npy')
  Y_true = np.load('./Data/WALG_inference/Y_'+file_name+'.npy')

  X_data, a, b = reshape_data(X_data0, X_data0, X_data0, network_type)
  Y_pred = np.argmax(model.predict(X_data), axis=1)
  cf_matrix = confusion_matrix(Y_true, Y_pred)
  class_wise_f1 = f1_score(Y_true, Y_pred, average=None)
  accuracy = accuracy_score(Y_true, Y_pred)
  counts = np.bincount(Y_pred)
  activity_id = np.argmax(counts)
  print("===========================================================");
  if os.path.exists('./Data/WALG_inference/'+file_name+'_dt.csv'):
    dt = pd.read_csv('./Data/WALG_inference/'+file_name+'_dt.csv');
    print ("Activity Start Time: " + str(dt.iloc[0,1]) + ' ' +str(dt.iloc[0,2]) );
    print ("Activity End Time:   " + str(dt.iloc[1,1]) + ' ' +str(dt.iloc[1,2]) );
  print('Accuracy:            {:.4f}'.format(accuracy))
  print("Expected Activity:   {}".format(ACTIVITIES_MAP[exp_act]));    
  print("Detected Activity:   {}".format(ACTIVITIES_MAP[activity_id]));
  print("===========================================================");

Activity Start Time: 2020-11-22 10:49:11.637
Activity End Time:   2020-11-22 10:59:22.238
Accuracy:            0.9900
Expected Activity:   Walking
Detected Activity:   Walking
Activity Start Time: 2020-11-22 11:25:36.540
Activity End Time:   2020-11-22 11:32:01.269
Accuracy:            0.9521
Expected Activity:   Walking
Detected Activity:   Walking
Activity Start Time: 2020-11-22 12:25:24.437
Activity End Time:   2020-11-22 12:27:07.375
Accuracy:            0.8085
Expected Activity:   Sitting
Detected Activity:   Sitting
Activity Start Time: 2020-11-22 09:37:32.630
Activity End Time:   2020-11-22 09:40:18.282
Accuracy:            1.0000
Expected Activity:   Brushing Teeth
Detected Activity:   Brushing Teeth
Accuracy:            1.0000
Expected Activity:   Brushing Teeth
Detected Activity:   Brushing Teeth
Activity Start Time: 2020-11-23 08:25:43.287
Activity End Time:   2020-11-23 08:28:32.150
Accuracy:            0.9875
Expected Activity:   Brushing Teeth
Detected Activity:   Brushin