In [53]:
import pandas as pd
import numpy as np
import os
import tsfresh
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
import tensorflow as tf

import os

import matplotlib.pyplot as plt

# keras goodies
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation, Flatten, Conv1D, Dropout, MaxPooling1D, BatchNormalization
from tensorflow.keras import optimizers
from tensorflow.keras import regularizers
from tensorflow.keras import metrics as kmetrics
import tensorflow.keras.backend as K

## Getting the data

In [18]:
data_path = '../pdiot-data/2021/'

In [57]:
base_df = pd.DataFrame()

# Get a list of all respeck files in all the student folders
all_files = []
for f in os.listdir(data_path):
    if os.path.isdir(data_path + '/' + f):
        files = os.listdir(data_path + '/' + f)
        files = [data_path + '/' + f + '/' + f2 for f2 in files if 'Respeck' in f2]
        all_files += files
        
# Load them all into a df
for filename in all_files:
    #full_path = f"{clean_data_folder}/{filename}"
    #print(full_path)
    
    # load data into a DataFrame
    new_df = pd.read_csv(filename)
    
    # merge into the base DataFrame
    base_df = pd.concat([base_df, new_df])

In [58]:
base_df

Unnamed: 0,timestamp,accel_x,accel_y,accel_z,gyro_x,gyro_y,gyro_z,sensor_type,activity_type,activity_code,subject_id,notes,recording_id
0,1.633516e+12,-0.763184,-0.014465,0.079773,22.062500,9.484375,15.671875,Respeck,Climbing stairs,12,s1541031,,Respeck_s1541031_Climbing stairs_06-10-2021_11...
1,1.633516e+12,-0.794678,0.042175,0.153503,7.578125,-11.687500,6.468750,Respeck,Climbing stairs,12,s1541031,,Respeck_s1541031_Climbing stairs_06-10-2021_11...
2,1.633516e+12,-0.748291,-0.076233,0.016296,15.781250,-9.375000,4.890625,Respeck,Climbing stairs,12,s1541031,,Respeck_s1541031_Climbing stairs_06-10-2021_11...
3,1.633516e+12,-0.949707,-0.011536,0.012390,6.046875,-7.031250,1.531250,Respeck,Climbing stairs,12,s1541031,,Respeck_s1541031_Climbing stairs_06-10-2021_11...
4,1.633516e+12,-1.209961,0.216492,0.016296,-19.218750,-6.078125,1.421875,Respeck,Climbing stairs,12,s1541031,,Respeck_s1541031_Climbing stairs_06-10-2021_11...
...,...,...,...,...,...,...,...,...,...,...,...,...,...
770,1.632940e+12,-0.304199,-1.083313,0.414978,-0.578125,18.859375,-18.781250,Respeck,Walking at normal speed,1,s2254050,,Respeck_s2254050_Walking at normal speed_29-09...
771,1.632940e+12,-0.057129,-1.235901,0.121765,1.593750,14.609375,-2.328125,Respeck,Walking at normal speed,1,s2254050,,Respeck_s2254050_Walking at normal speed_29-09...
772,1.632940e+12,0.239258,-1.449768,-0.063782,-6.281250,22.671875,18.984375,Respeck,Walking at normal speed,1,s2254050,,Respeck_s2254050_Walking at normal speed_29-09...
773,1.632940e+12,0.305908,-1.131653,-0.114075,-3.718750,11.906250,13.140625,Respeck,Walking at normal speed,1,s2254050,,Respeck_s2254050_Walking at normal speed_29-09...


## Preparing the sliding windows

TODO: Figure out how to handle falling in sliding window

In [188]:
window_size = 50

In [100]:
base_df['activity_type'].unique()

array(['Climbing stairs', 'Descending stairs', 'Desk work',
       'Falling on knees', 'Falling on the back', 'Falling on the left',
       'Falling on the right', 'Lying down left', 'Lying down on back',
       'Lying down on stomach', 'Lying down right', 'Movement', 'Running',
       'Sitting bent backward', 'Sitting bent forward', 'Sitting',
       'Standing', 'Walking at normal speed'], dtype=object)

In [101]:
activities_of_interest = ['Standing', 'Walking at normal speed']

In [136]:
window_size = 50 # 50 datapoints for the window size, which, at 25Hz, means 2 seconds
step_size = 50 # 25 is 50% overlap

window_number = 0 # start a counter at 0 to keep track of the window number

all_overlapping_windows = []

for rid, group in base_df.groupby("recording_id"):
    if group['activity_type'].iloc[0] in activities_of_interest:
    
        print(f"Processing rid = {rid}")

        large_enough_windows = [window for window in group.rolling(window=window_size, min_periods=window_size) if len(window) == window_size]

        overlapping_windows = large_enough_windows[::step_size] 

        # then we will append a window ID to each window
        for window in overlapping_windows:
            window.loc[:, 'window_id'] = window_number
            window_number += 1

        #print(overlapping_windows)

        all_overlapping_windows.append(pd.concat(overlapping_windows).reset_index(drop=True))

Processing rid = Respeck_s1541031_Standing_06-10-2021_11-10-10
Processing rid = Respeck_s1541031_Walking at normal speed_06-10-2021_18-30-14
Processing rid = Respeck_s1704145_Standing_23-09-2021_11-40-05


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = _infer_fill_value(value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)


Processing rid = Respeck_s1704145_Walking at normal speed_23-09-2021_11-32-29
Processing rid = Respeck_s1706154_Standing_05-10-2021_12-24-10
Processing rid = Respeck_s1706154_Walking at normal speed_05-10-2021_12-28-35
Processing rid = Respeck_s1711661_Standing_23-09-2021_19-58-56
Processing rid = Respeck_s1711661_Walking at normal speed_23-09-2021_20-23-44
Processing rid = Respeck_s1718069_Standing_29-09-2021_12-26-43
Processing rid = Respeck_s1718069_Walking at normal speed_29-09-2021_12-27-51
Processing rid = Respeck_s1721256_Standing_01-10-2021_19-00-30
Processing rid = Respeck_s1721256_Walking at normal speed_02-10-2021_22-12-45
Processing rid = Respeck_s1724067_Standing_03-10-2021_16-29-48
Processing rid = Respeck_s1724067_Walking at normal speed_06-10-2021_19-54-01
Processing rid = Respeck_s1724279_Standing_30-09-2021_12-34-44
Processing rid = Respeck_s1724279_Walking at normal speed_30-09-2021_12-39-50
Processing rid = Respeck_s1727780_Standing_29-09-2021_21-40-57
Processing ri

In [137]:
final_sliding_windows = pd.concat(all_overlapping_windows).reset_index(drop=True)

## Split into training and test sets

Make sure to split by subject!

In [135]:
np.shape(X)

(1358, 50, 6)

In [75]:
columns_of_interest = ['accel_x', 'accel_y', 'accel_z', 'gyro_x', 'gyro_y', 'gyro_z']

In [171]:
# Simple for now
subjects = final_sliding_windows['subject_id'].unique()

train_subjects, test_subjects = train_test_split(subjects, test_size=0.2, train_size=0.8)

X_train_full = final_sliding_windows[final_sliding_windows['subject_id'].isin(train_subjects)][['window_id'] + columns_of_interest]
y_train_full = final_sliding_windows[final_sliding_windows['subject_id'].isin(train_subjects)][['window_id'] + ['activity_type']]

X_test_full = final_sliding_windows[final_sliding_windows['subject_id'].isin(test_subjects)][['window_id'] + columns_of_interest]
y_test_full = final_sliding_windows[final_sliding_windows['subject_id'].isin(test_subjects)][['window_id'] + ['activity_type']]


In [172]:
X_train_full

Unnamed: 0,window_id,accel_x,accel_y,accel_z,gyro_x,gyro_y,gyro_z
0,0,-1.014648,-0.066467,-0.175598,0.375000,-3.109375,0.281250
1,0,-0.983643,-0.058411,-0.158020,1.187500,-0.796875,-0.609375
2,0,-1.000000,-0.060608,-0.181458,0.890625,-0.828125,-0.343750
3,0,-0.992432,-0.058655,-0.176086,0.890625,0.140625,-0.531250
4,0,-1.000244,-0.057678,-0.176086,0.343750,-0.828125,-0.890625
...,...,...,...,...,...,...,...
67895,1357,0.127197,-1.350159,-0.629700,27.984375,22.046875,37.703125
67896,1357,0.708984,-1.684631,0.001892,19.843750,1.375000,17.234375
67897,1357,0.162109,-1.151672,0.287048,-20.468750,-14.328125,-46.437500
67898,1357,-0.223145,-0.688293,0.276550,8.140625,-5.718750,-19.796875


## Format the input

Format data into windows - make (50,6) inputs

In [142]:
class_labels = {
    'Standing': 0,
    'Walking at normal speed': 1
}

In [178]:
y_train = y_train_full.groupby("window_id")[['activity_type']].agg(np.min).replace(class_labels)['activity_type'].values
y_test = y_test_full.groupby("window_id")[['activity_type']].agg(np.min).replace(class_labels)['activity_type'].values

y_train = np.asarray(pd.get_dummies(y_train), dtype=np.float32)
y_test = np.asarray(pd.get_dummies(y_test), dtype=np.float32)

In [175]:
X_train = []
X_test = []

for window_id, group in X_train_full.groupby('window_id'):
    #print(f"window_id = {window_id}")
    
    shape = group[columns_of_interest].values.shape
    #print(f"shape = {shape}")
    
    X_train.append(group[columns_of_interest].values)
    
for window_id, group in X_test_full.groupby('window_id'):
    #print(f"window_id = {window_id}")
    
    shape = group[columns_of_interest].values.shape
    #print(f"shape = {shape}")
    
    X_test.append(group[columns_of_interest].values)
    
X_train = np.asarray(X_train)
X_test = np.asarray(X_test)

In [179]:
print(np.shape(X_train))
print(np.shape(X_test))

print(np.shape(y_train))
print(np.shape(y_test))

(1058, 50, 6)
(300, 50, 6)
(1058, 2)
(300, 2)


## Prepping the model

In [184]:
filters = 64
kernel_size = 3
n_features = 6
activation='relu'
n_classes = 2

In [185]:
model = Sequential()

model.add(Conv1D(filters=filters, kernel_size=kernel_size, activation='linear', 
                 input_shape=(window_size, n_features)))
model.add(BatchNormalization())
model.add(Activation(activation))

model.add(Conv1D(filters=filters, kernel_size=kernel_size, activation='linear'))
model.add(BatchNormalization())
model.add(Activation(activation))

model.add(Conv1D(filters=filters, kernel_size=kernel_size, activation='linear'))
model.add(BatchNormalization())
model.add(Activation(activation))

model.add(Flatten())
model.add(Dense(100, activation='relu'))
model.add(Dense(n_classes, activation='softmax'))

model.summary()

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1d_6 (Conv1D)            (None, 48, 64)            1216      
_________________________________________________________________
batch_normalization_6 (Batch (None, 48, 64)            256       
_________________________________________________________________
activation_6 (Activation)    (None, 48, 64)            0         
_________________________________________________________________
conv1d_7 (Conv1D)            (None, 46, 64)            12352     
_________________________________________________________________
batch_normalization_7 (Batch (None, 46, 64)            256       
_________________________________________________________________
activation_7 (Activation)    (None, 46, 64)            0         
_________________________________________________________________
conv1d_8 (Conv1D)            (None, 44, 64)           

## Training the model

In [186]:
model.compile(
    optimizer=optimizers.SGD(lr=0.001),
    loss='binary_crossentropy',
    metrics = ['accuracy'])

In [190]:
model.fit(X_train, y_train, batch_size=2, epochs=10)

Train on 1058 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x164b6d94408>

## Testing the model

In [192]:
# stats
y_pred_ohe = model.predict(X_test)
y_pred_labels = np.argmax(y_pred_ohe, axis=1)
y_true_labels = np.argmax(y_test, axis=1)

In [193]:
y_pred_ohe

array([[9.9752873e-01, 2.4712498e-03],
       [9.9587810e-01, 4.1218596e-03],
       [9.9756014e-01, 2.4399003e-03],
       [9.9743134e-01, 2.5686042e-03],
       [9.9550879e-01, 4.4911569e-03],
       [9.9787760e-01, 2.1224320e-03],
       [9.9719548e-01, 2.8045545e-03],
       [9.9597675e-01, 4.0232325e-03],
       [9.9770832e-01, 2.2916519e-03],
       [9.9693727e-01, 3.0627472e-03],
       [9.9637467e-01, 3.6253699e-03],
       [9.9757904e-01, 2.4210373e-03],
       [9.9657059e-01, 3.4293788e-03],
       [9.9804795e-01, 1.9520925e-03],
       [9.9658513e-01, 3.4148211e-03],
       [1.0139002e-01, 8.9861000e-01],
       [2.3555653e-02, 9.7644436e-01],
       [3.1897718e-01, 6.8102282e-01],
       [3.4324178e-01, 6.5675825e-01],
       [1.5478685e-01, 8.4521306e-01],
       [8.1362873e-01, 1.8637134e-01],
       [2.3512240e-01, 7.6487762e-01],
       [3.7916947e-02, 9.6208304e-01],
       [4.7195899e-01, 5.2804106e-01],
       [4.0569406e-02, 9.5943063e-01],
       [4.8296607e-01, 5.

In [194]:
print("*" * 80)
print("Classification report")
print("*" * 80)
print(classification_report(y_true_labels, y_pred_labels))

********************************************************************************
Classification report
********************************************************************************
              precision    recall  f1-score   support

           0       0.91      1.00      0.95       150
           1       1.00      0.90      0.95       150

    accuracy                           0.95       300
   macro avg       0.95      0.95      0.95       300
weighted avg       0.95      0.95      0.95       300



### Save a test intance

In [203]:
np.savetxt('data/test_instance.txt', X_test[0])

## Save to file and convert to tflite

In [195]:
# Saving the model to file
export_dir = 'models\cnn_walking_standing'
tf.saved_model.save(model,export_dir)

# Convert the saved tensorflow model to tensorflow lite
converter = tf.lite.TFLiteConverter.from_saved_model(export_dir)
tflite_model = converter.convert()

# Save the tensorflow lite model to file
import pathlib
tflite_model_file = pathlib.Path('models/cnn_walking_standing.tflite')
tflite_model_file.write_bytes(tflite_model)

Instructions for updating:
If using Keras pass *_constraint arguments to layers.
INFO:tensorflow:Assets written to: models\cnn_walking_standing\assets


1234856