In [41]:
import pandas as pd
import numpy as np
import tsfresh
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
import tensorflow as tf

import os

import matplotlib.pyplot as plt

# keras goodies
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation, Flatten, Conv1D, Dropout, MaxPooling1D, BatchNormalization
from tensorflow.keras import optimizers
from tensorflow.keras import regularizers
from tensorflow.keras import metrics as kmetrics
import tensorflow.keras.backend as K

In [52]:
import os
clean_data_folder = "./Data/Clean"
for dirpath, dirnames, filenames in os.walk(clean_data_folder):
    print(dirpath)
    print(filenames)

./Data/Clean
[]
./Data/Clean\s1541031
['Respeck_s1541031_Climbing stairs_06-10-2021_11-25-43.csv', 'Respeck_s1541031_Descending stairs_06-10-2021_11-25-05.csv', 'Respeck_s1541031_Desk work_06-10-2021_11-04-49.csv', 'Respeck_s1541031_Falling on knees_06-10-2021_15-48-40.csv.csv', 'Respeck_s1541031_Falling on the back_06-10-2021_15-55-24.csv.csv', 'Respeck_s1541031_Falling on the left_06-10-2021_16-04-18.csv.csv', 'Respeck_s1541031_Falling on the right_06-10-2021_16-10-42.csv.csv', 'Respeck_s1541031_Lying down left_06-10-2021_14-30-04.csv', 'Respeck_s1541031_Lying down on back_06-10-2021_11-15-13.csv', 'Respeck_s1541031_Lying down on stomach_06-10-2021_14-30-50.csv', 'Respeck_s1541031_Lying down right_06-10-2021_14-27-40.csv', 'Respeck_s1541031_Movement_06-10-2021_11-11-18.csv', 'Respeck_s1541031_Running_08-10-2021_12-10-37.csv', 'Respeck_s1541031_Sitting bent backward_06-10-2021_14-33-45.csv', 'Respeck_s1541031_Sitting bent forward_06-10-2021_14-32-55.csv', 'Respeck_s1541031_Sitting_06-

In [60]:
base_df = pd.DataFrame()

clean_data_folder = "./Data/Clean"

for dirpath, dirnames, filenames in os.walk(clean_data_folder):
    if len(filenames) != 0:
        for f in filenames:
            full_path = f"{dirpath}/{f}"
            #Only load data from respeck
            if "Respeck" in full_path: 
                # load data into a DataFrame
                new_df = pd.read_csv(full_path)
                # merge into the base DataFrame
                base_df = pd.concat([base_df, new_df])

In [63]:
len(base_df)

646319

In [64]:
base_df.reset_index(drop=True, inplace=True)
base_df

Unnamed: 0,timestamp,accel_x,accel_y,accel_z,gyro_x,gyro_y,gyro_z,sensor_type,activity_type,activity_code,subject_id,notes,recording_id
0,1.633516e+12,-0.763184,-0.014465,0.079773,22.062500,9.484375,15.671875,Respeck,Climbing stairs,12,s1541031,,Respeck_s1541031_Climbing stairs_06-10-2021_11...
1,1.633516e+12,-0.794678,0.042175,0.153503,7.578125,-11.687500,6.468750,Respeck,Climbing stairs,12,s1541031,,Respeck_s1541031_Climbing stairs_06-10-2021_11...
2,1.633516e+12,-0.748291,-0.076233,0.016296,15.781250,-9.375000,4.890625,Respeck,Climbing stairs,12,s1541031,,Respeck_s1541031_Climbing stairs_06-10-2021_11...
3,1.633516e+12,-0.949707,-0.011536,0.012390,6.046875,-7.031250,1.531250,Respeck,Climbing stairs,12,s1541031,,Respeck_s1541031_Climbing stairs_06-10-2021_11...
4,1.633516e+12,-1.209961,0.216492,0.016296,-19.218750,-6.078125,1.421875,Respeck,Climbing stairs,12,s1541031,,Respeck_s1541031_Climbing stairs_06-10-2021_11...
...,...,...,...,...,...,...,...,...,...,...,...,...,...
646314,1.632940e+12,-0.304199,-1.083313,0.414978,-0.578125,18.859375,-18.781250,Respeck,Walking at normal speed,1,s2254050,,Respeck_s2254050_Walking at normal speed_29-09...
646315,1.632940e+12,-0.057129,-1.235901,0.121765,1.593750,14.609375,-2.328125,Respeck,Walking at normal speed,1,s2254050,,Respeck_s2254050_Walking at normal speed_29-09...
646316,1.632940e+12,0.239258,-1.449768,-0.063782,-6.281250,22.671875,18.984375,Respeck,Walking at normal speed,1,s2254050,,Respeck_s2254050_Walking at normal speed_29-09...
646317,1.632940e+12,0.305908,-1.131653,-0.114075,-3.718750,11.906250,13.140625,Respeck,Walking at normal speed,1,s2254050,,Respeck_s2254050_Walking at normal speed_29-09...


## Formatting recording_id (Assign a groupId based on subject_id and activity_type)
There are some ill formated recording_id egg, same activity should have same recording_id, the following line will create unique id out of (subject_id,activity_type), thereby fixing the ill formated recording_id

In [79]:
base_df['grpId']=base_df.groupby(['activity_type','subject_id']).ngroup().add(1)

Unnamed: 0,timestamp,accel_x,accel_y,accel_z,gyro_x,gyro_y,gyro_z,sensor_type,activity_type,activity_code,subject_id,notes,recording_id,grpId
0,1.633516e+12,-0.763184,-0.014465,0.079773,22.062500,9.484375,15.671875,Respeck,Climbing stairs,12,s1541031,,Respeck_s1541031_Climbing stairs_06-10-2021_11...,1
1,1.633516e+12,-0.794678,0.042175,0.153503,7.578125,-11.687500,6.468750,Respeck,Climbing stairs,12,s1541031,,Respeck_s1541031_Climbing stairs_06-10-2021_11...,1
2,1.633516e+12,-0.748291,-0.076233,0.016296,15.781250,-9.375000,4.890625,Respeck,Climbing stairs,12,s1541031,,Respeck_s1541031_Climbing stairs_06-10-2021_11...,1
3,1.633516e+12,-0.949707,-0.011536,0.012390,6.046875,-7.031250,1.531250,Respeck,Climbing stairs,12,s1541031,,Respeck_s1541031_Climbing stairs_06-10-2021_11...,1
4,1.633516e+12,-1.209961,0.216492,0.016296,-19.218750,-6.078125,1.421875,Respeck,Climbing stairs,12,s1541031,,Respeck_s1541031_Climbing stairs_06-10-2021_11...,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13866,1.633541e+12,0.197021,-1.142151,-0.129456,-1.328125,11.562500,8.671875,Respeck,Walking at normal speed,1,s1541031,,Respeck_s1541031_Walking at normal speed_06-10...,794
13867,1.633541e+12,0.353027,-1.001770,0.024597,-1.625000,13.328125,1.156250,Respeck,Walking at normal speed,1,s1541031,,Respeck_s1541031_Walking at normal speed_06-10...,794
13868,1.633541e+12,0.076172,-0.880432,0.091980,3.171875,13.406250,-6.765625,Respeck,Walking at normal speed,1,s1541031,,Respeck_s1541031_Walking at normal speed_06-10...,794
13869,1.633541e+12,0.097656,-0.638245,0.061707,5.921875,6.000000,-4.562500,Respeck,Walking at normal speed,1,s1541031,,Respeck_s1541031_Walking at normal speed_06-10...,794


## create window id for each activity/student_id
each window_id is a training sample for a specific activity.

each window_id represent a training input for an activity, one activity can have multiple window_id.

In [81]:
window_size = 50 # 50 datapoints for the window size, which, at 25Hz, means 2 seconds
step_size = 25 # this is 50% overlap

window_number = 0 # start a counter at 0 to keep track of the window number

all_overlapping_windows = []

for gid, group in base_df.groupby("grpId"):
    
    large_enough_windows = [window for window in group.rolling(window=window_size, min_periods=window_size) if len(window) == window_size]
    
    overlapping_windows = large_enough_windows[::step_size] 
    
    # then we will append a window ID to each window
    for window in overlapping_windows:
        window.loc[:, 'window_id'] = window_number
        window_number += 1


    all_overlapping_windows.append(pd.concat(overlapping_windows).reset_index(drop=True))

In [89]:
final_sliding_windows = pd.concat(all_overlapping_windows).reset_index(drop=True)

Taking a look at activity for each activity

In [90]:
final_sliding_windows.groupby("window_id")[['activity_type']].agg(np.min)

Unnamed: 0_level_0,activity_type
window_id,Unnamed: 1_level_1
0,Climbing stairs
1,Climbing stairs
2,Climbing stairs
3,Climbing stairs
4,Climbing stairs
...,...
24655,Walking at normal speed
24656,Walking at normal speed
24657,Walking at normal speed
24658,Walking at normal speed


In [91]:
final_sliding_windows.groupby("window_id")[['activity_type']].agg(np.min)['activity_type'].unique()

array(['Climbing stairs', 'Descending stairs', 'Desk work',
       'Falling on knees', 'Falling on the back', 'Falling on the left',
       'Falling on the right', 'Lying down left', 'Lying down on back',
       'Lying down on stomach', 'Lying down right', 'Movement', 'Running',
       'Sitting', 'Sitting bent backward', 'Sitting bent forward',
       'Standing', 'Walking at normal speed'], dtype=object)

Assign class label for every activity, later we will convert them to one-hot encoding

In [95]:
class_labels = {
    'Desk work': 0,
    'Walking at normal speed': 1,
    'Climbing stairs': 2,
    'Descending stairs':3,
    'Falling on knees': 4,
    'Falling on the back': 5,
    'Falling on the left': 6,
    'Falling on the right': 7,
    'Lying down left': 8,
    'Lying down on back': 9,
    'Lying down on stomach': 10,
    'Lying down right': 11,
    'Movement': 12,
    'Running': 13,
        'Sitting': 14,
        'Sitting bent backward': 15,
        'Sitting bent forward': 16,
    'Standing': 17
}

In [96]:
window_id_class_labels = final_sliding_windows.groupby("window_id")[['activity_type']].agg(np.min).replace(class_labels)
window_id_class_labels

Unnamed: 0_level_0,activity_type
window_id,Unnamed: 1_level_1
0,2
1,2
2,2
3,2
4,2
...,...
24655,1
24656,1
24657,1
24658,1


In [97]:
window_id_class_labels.groupby("window_id")[['activity_type']].agg(np.min)['activity_type'].unique()

array([ 2,  3,  0,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,
        1], dtype=int64)

Lastly, we are only interested in the following columns

In [None]:
columns_of_interest = ['accel_x', 'accel_y', 'accel_z', 'gyro_x', 'gyro_y', 'gyro_z']

## Training a simple CNN

In [98]:
filters = 64
kernel_size = 3
n_features = 6
activation='relu'
n_classes = 18

model = Sequential()

model.add(Conv1D(filters=filters, kernel_size=kernel_size, activation='linear', 
                 input_shape=(window_size, n_features)))
model.add(BatchNormalization())
model.add(Activation(activation))

model.add(Conv1D(filters=filters, kernel_size=kernel_size, activation='linear'))
model.add(BatchNormalization())
model.add(Activation(activation))

model.add(Conv1D(filters=filters, kernel_size=kernel_size, activation='linear'))
model.add(BatchNormalization())
model.add(Activation(activation))

model.add(Flatten())
model.add(Dense(100, activation='relu'))
model.add(Dense(n_classes, activation='softmax'))

model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1d (Conv1D)              (None, 48, 64)            1216      
_________________________________________________________________
batch_normalization (BatchNo (None, 48, 64)            256       
_________________________________________________________________
activation (Activation)      (None, 48, 64)            0         
_________________________________________________________________
conv1d_1 (Conv1D)            (None, 46, 64)            12352     
_________________________________________________________________
batch_normalization_1 (Batch (None, 46, 64)            256       
_________________________________________________________________
activation_1 (Activation)    (None, 46, 64)            0         
_________________________________________________________________
conv1d_2 (Conv1D)            (None, 44, 64)            1

In [100]:
X = []
y = []

for window_id, group in final_sliding_windows.groupby('window_id'):
    
    shape = group[columns_of_interest].values.shape
    
    X.append(group[columns_of_interest].values)
    y.append(class_labels[group["activity_type"].values[0]])

In [101]:
X = np.asarray(X)
y = np.asarray(y)

print(f"X shape = {X.shape}")
print(f"y shape = {y.shape}")

X shape = (24660, 50, 6)
y shape = (24660,)


One-hot encode y_values

In [104]:
def indices_to_one_hot(data, nb_classes):
    """Convert an iterable of indices to one-hot encoded labels."""
    targets = np.array(data).reshape(-1)
    return np.eye(nb_classes)[targets]

In [107]:
y= indices_to_one_hot(y,18)

In [108]:
X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                    test_size=0.2, train_size=0.8)

y_train = np.asarray(pd.get_dummies(y_train), dtype=np.float32)
y_test = np.asarray(pd.get_dummies(y_test), dtype=np.float32)

ValueError: Data must be 1-dimensional

In [None]:
def indices_to_one_hot(data, nb_classes):
    """Convert an iterable of indices to one-hot encoded labels."""
    targets = np.array(data).reshape(-1)
    return np.eye(nb_classes)[targets]

y= indices_to_one_hot(y,18)