In [1]:
import pandas as pd
import numpy as np
import os
import tsfresh
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
import tensorflow as tf

import os

import matplotlib.pyplot as plt

# keras goodies
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation, Flatten, Conv1D, Dropout, MaxPooling1D, BatchNormalization
from tensorflow.keras import optimizers
from tensorflow.keras import regularizers
from tensorflow.keras import metrics as kmetrics
import tensorflow.keras.backend as K

## Getting the data

In [2]:
data_path = '../pdiot-data/2021/'

In [3]:
base_df = pd.DataFrame()

# Get a list of all respeck files in all the student folders
all_files = []
for f in os.listdir(data_path):
    if os.path.isdir(data_path + '/' + f):
        files = os.listdir(data_path + '/' + f)
        files = [data_path + '/' + f + '/' + f2 for f2 in files if 'Respeck' in f2]
        all_files += files
        
# Load them all into a df
for filename in all_files:
    #full_path = f"{clean_data_folder}/{filename}"
    #print(full_path)
    
    # load data into a DataFrame
    new_df = pd.read_csv(filename)
    
    # merge into the base DataFrame
    base_df = pd.concat([base_df, new_df])

In [4]:
base_df

Unnamed: 0,timestamp,accel_x,accel_y,accel_z,gyro_x,gyro_y,gyro_z,sensor_type,activity_type,activity_code,subject_id,notes,recording_id
0,1.633516e+12,-0.763184,-0.014465,0.079773,22.062500,9.484375,15.671875,Respeck,Climbing stairs,12,s1541031,,Respeck_s1541031_Climbing stairs_06-10-2021_11...
1,1.633516e+12,-0.794678,0.042175,0.153503,7.578125,-11.687500,6.468750,Respeck,Climbing stairs,12,s1541031,,Respeck_s1541031_Climbing stairs_06-10-2021_11...
2,1.633516e+12,-0.748291,-0.076233,0.016296,15.781250,-9.375000,4.890625,Respeck,Climbing stairs,12,s1541031,,Respeck_s1541031_Climbing stairs_06-10-2021_11...
3,1.633516e+12,-0.949707,-0.011536,0.012390,6.046875,-7.031250,1.531250,Respeck,Climbing stairs,12,s1541031,,Respeck_s1541031_Climbing stairs_06-10-2021_11...
4,1.633516e+12,-1.209961,0.216492,0.016296,-19.218750,-6.078125,1.421875,Respeck,Climbing stairs,12,s1541031,,Respeck_s1541031_Climbing stairs_06-10-2021_11...
...,...,...,...,...,...,...,...,...,...,...,...,...,...
770,1.632940e+12,-0.304199,-1.083313,0.414978,-0.578125,18.859375,-18.781250,Respeck,Walking at normal speed,1,s2254050,,Respeck_s2254050_Walking at normal speed_29-09...
771,1.632940e+12,-0.057129,-1.235901,0.121765,1.593750,14.609375,-2.328125,Respeck,Walking at normal speed,1,s2254050,,Respeck_s2254050_Walking at normal speed_29-09...
772,1.632940e+12,0.239258,-1.449768,-0.063782,-6.281250,22.671875,18.984375,Respeck,Walking at normal speed,1,s2254050,,Respeck_s2254050_Walking at normal speed_29-09...
773,1.632940e+12,0.305908,-1.131653,-0.114075,-3.718750,11.906250,13.140625,Respeck,Walking at normal speed,1,s2254050,,Respeck_s2254050_Walking at normal speed_29-09...


## Preparing the sliding windows

TODO: Figure out how to handle falling in sliding window

In [6]:
base_df['activity_type'].unique()

array(['Climbing stairs', 'Descending stairs', 'Desk work',
       'Falling on knees', 'Falling on the back', 'Falling on the left',
       'Falling on the right', 'Lying down left', 'Lying down on back',
       'Lying down on stomach', 'Lying down right', 'Movement', 'Running',
       'Sitting bent backward', 'Sitting bent forward', 'Sitting',
       'Standing', 'Walking at normal speed'], dtype=object)

In [7]:
activities_of_interest = ['Lying down on back', 'Running']

In [8]:
window_size = 50 # 50 datapoints for the window size, which, at 25Hz, means 2 seconds
step_size = 50 # 25 is 50% overlap

window_number = 0 # start a counter at 0 to keep track of the window number

all_overlapping_windows = []

for rid, group in base_df.groupby("recording_id"):
    if group['activity_type'].iloc[0] in activities_of_interest:
    
        print(f"Processing rid = {rid}")

        large_enough_windows = [window for window in group.rolling(window=window_size, min_periods=window_size) if len(window) == window_size]

        overlapping_windows = large_enough_windows[::step_size] 

        # then we will append a window ID to each window
        for window in overlapping_windows:
            window.loc[:, 'window_id'] = window_number
            window_number += 1

        #print(overlapping_windows)

        all_overlapping_windows.append(pd.concat(overlapping_windows).reset_index(drop=True))

Processing rid = Respeck_s1541031_Lying down on back_06-10-2021_11-15-13
Processing rid = Respeck_s1541031_Running_08-10-2021_12-10-37
Processing rid = Respeck_s1702583_Lying down on back_07-10-2021_20-34-35
Processing rid = Respeck_s1702583_Lying down on stomach_07-10-2021_20-37-05


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = _infer_fill_value(value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)


Processing rid = Respeck_s1704145_Lying down on back_23-09-2021_11-42-59
Processing rid = Respeck_s1704145_Running_23-09-2021_11-31-45
Processing rid = Respeck_s1706154_Lying down on back_05-10-2021_12-25-07
Processing rid = Respeck_s1706154_Running_05-10-2021_12-29-15
Processing rid = Respeck_s1711661_Lying down on back_23-09-2021_20-06-21
Processing rid = Respeck_s1711661_Running_23-09-2021_20-44-06
Processing rid = Respeck_s1718069_Lying down on back_04-10-2021_14-26-49
Processing rid = Respeck_s1718069_Running_04-10-2021_14-36-00
Processing rid = Respeck_s1721256_Lying down on back_01-10-2021_19-03-04
Processing rid = Respeck_s1721256_Running_02-10-2021_17-26-10
Processing rid = Respeck_s1724067_Lying down on back_03-10-2021_16-39-39
Processing rid = Respeck_s1724067_Running_06-10-2021_19-55-17
Processing rid = Respeck_s1724279_Lying down on back_30-09-2021_12-35-42
Processing rid = Respeck_s1724279_Running_30-09-2021_12-40-35
Processing rid = Respeck_s1727780_Lying down on back_29

In [9]:
final_sliding_windows = pd.concat(all_overlapping_windows).reset_index(drop=True)

## Split into training and test sets

Make sure to split by subject!

In [10]:
columns_of_interest = ['accel_x', 'accel_y', 'accel_z', 'gyro_x', 'gyro_y', 'gyro_z']

In [11]:
# Simple for now
subjects = final_sliding_windows['subject_id'].unique()

train_subjects, test_subjects = train_test_split(subjects, test_size=0.2, train_size=0.8)

X_train_full = final_sliding_windows[final_sliding_windows['subject_id'].isin(train_subjects)][['window_id'] + columns_of_interest]
y_train_full = final_sliding_windows[final_sliding_windows['subject_id'].isin(train_subjects)][['window_id'] + ['activity_type']]

X_test_full = final_sliding_windows[final_sliding_windows['subject_id'].isin(test_subjects)][['window_id'] + columns_of_interest]
y_test_full = final_sliding_windows[final_sliding_windows['subject_id'].isin(test_subjects)][['window_id'] + ['activity_type']]


In [12]:
X_train_full

Unnamed: 0,window_id,accel_x,accel_y,accel_z,gyro_x,gyro_y,gyro_z
1500,30,-0.577637,0.004089,0.829041,0.734375,-0.734375,-0.062500
1501,30,-0.580322,-0.008118,0.810242,-1.750000,-0.125000,-0.078125
1502,30,-0.596680,-0.015442,0.811951,1.468750,-0.265625,0.156250
1503,30,-0.582520,-0.026184,0.809265,0.171875,-0.171875,0.250000
1504,30,-0.582764,-0.017639,0.806335,-0.015625,-0.453125,-0.250000
...,...,...,...,...,...,...,...
69445,1388,-0.538086,-0.213196,0.878601,0.222656,0.089844,0.140625
69446,1388,-0.536133,-0.215393,0.880554,0.265625,0.156250,0.113281
69447,1388,-0.532715,-0.212952,0.882019,0.152344,0.101562,0.105469
69448,1388,-0.535156,-0.212708,0.876648,0.078125,0.058594,0.121094


## Format the input

Format data into windows - make (50,6) inputs

In [14]:
class_labels = {
    'Lying down on back': 0,
    'Running': 1
}

In [15]:
y_train = y_train_full.groupby("window_id")[['activity_type']].agg(np.min).replace(class_labels)['activity_type'].values
y_test = y_test_full.groupby("window_id")[['activity_type']].agg(np.min).replace(class_labels)['activity_type'].values

y_train = np.asarray(pd.get_dummies(y_train), dtype=np.float32)
y_test = np.asarray(pd.get_dummies(y_test), dtype=np.float32)

In [16]:
X_train = []
X_test = []

for window_id, group in X_train_full.groupby('window_id'):
    #print(f"window_id = {window_id}")
    
    shape = group[columns_of_interest].values.shape
    #print(f"shape = {shape}")
    
    X_train.append(group[columns_of_interest].values)
    
for window_id, group in X_test_full.groupby('window_id'):
    #print(f"window_id = {window_id}")
    
    shape = group[columns_of_interest].values.shape
    #print(f"shape = {shape}")
    
    X_test.append(group[columns_of_interest].values)
    
X_train = np.asarray(X_train)
X_test = np.asarray(X_test)

In [17]:
print(np.shape(X_train))
print(np.shape(X_test))

print(np.shape(y_train))
print(np.shape(y_test))

(1089, 50, 6)
(300, 50, 6)
(1089, 2)
(300, 2)


## Prepping the model

In [18]:
filters = 64
kernel_size = 3
n_features = 6
activation='relu'
n_classes = 2

In [19]:
model = Sequential()

model.add(Conv1D(filters=filters, kernel_size=kernel_size, activation='linear', 
                 input_shape=(window_size, n_features)))
model.add(BatchNormalization())
model.add(Activation(activation))

model.add(Conv1D(filters=filters, kernel_size=kernel_size, activation='linear'))
model.add(BatchNormalization())
model.add(Activation(activation))

model.add(Conv1D(filters=filters, kernel_size=kernel_size, activation='linear'))
model.add(BatchNormalization())
model.add(Activation(activation))

model.add(Flatten())
model.add(Dense(100, activation='relu'))
model.add(Dense(n_classes, activation='softmax'))

model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1d (Conv1D)              (None, 48, 64)            1216      
_________________________________________________________________
batch_normalization (BatchNo (None, 48, 64)            256       
_________________________________________________________________
activation (Activation)      (None, 48, 64)            0         
_________________________________________________________________
conv1d_1 (Conv1D)            (None, 46, 64)            12352     
_________________________________________________________________
batch_normalization_1 (Batch (None, 46, 64)            256       
_________________________________________________________________
activation_1 (Activation)    (None, 46, 64)            0         
_________________________________________________________________
conv1d_2 (Conv1D)            (None, 44, 64)            1

## Training the model

In [20]:
model.compile(
    optimizer=optimizers.SGD(lr=0.001),
    loss='binary_crossentropy',
    metrics = ['accuracy'])

In [21]:
model.fit(X_train, y_train, batch_size=2, epochs=10)

Train on 1089 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x2139b3d1608>

## Testing the model

In [22]:
# stats
y_pred_ohe = model.predict(X_test)
y_pred_labels = np.argmax(y_pred_ohe, axis=1)
y_true_labels = np.argmax(y_test, axis=1)

In [23]:
y_pred_ohe

array([[9.95861769e-01, 4.13822243e-03],
       [9.95947301e-01, 4.05266136e-03],
       [9.95842636e-01, 4.15735133e-03],
       [9.96129990e-01, 3.86995310e-03],
       [9.95804369e-01, 4.19561425e-03],
       [9.95920062e-01, 4.07992024e-03],
       [9.95925307e-01, 4.07465547e-03],
       [9.95910168e-01, 4.08983091e-03],
       [9.95979905e-01, 4.02010232e-03],
       [9.95709896e-01, 4.29012626e-03],
       [9.95912850e-01, 4.08710074e-03],
       [9.95870888e-01, 4.12910245e-03],
       [9.95879531e-01, 4.12044115e-03],
       [9.95913446e-01, 4.08652052e-03],
       [9.95750070e-01, 4.24985774e-03],
       [3.56851295e-02, 9.64314878e-01],
       [5.23309968e-02, 9.47669029e-01],
       [4.04647589e-02, 9.59535182e-01],
       [2.67979559e-02, 9.73201990e-01],
       [1.11257732e-02, 9.88874197e-01],
       [4.63168398e-02, 9.53683078e-01],
       [2.20316246e-01, 7.79683769e-01],
       [1.37011930e-02, 9.86298740e-01],
       [1.59190390e-02, 9.84080911e-01],
       [1.268786

In [24]:
print("*" * 80)
print("Classification report")
print("*" * 80)
print(classification_report(y_true_labels, y_pred_labels))

********************************************************************************
Classification report
********************************************************************************
              precision    recall  f1-score   support

           0       0.95      1.00      0.97       150
           1       1.00      0.95      0.97       150

    accuracy                           0.97       300
   macro avg       0.97      0.97      0.97       300
weighted avg       0.97      0.97      0.97       300



### Save a test intance

In [33]:
for i in range(5):
    np.savetxt('data/test_instance' + str(i) + '.txt', X_test[i])

In [34]:
for i in np.arange(15,20):
    print(y_test[i])
    np.savetxt('data/test_instance' + str(i) + '.txt', X_test[i])

[0. 1.]
[0. 1.]
[0. 1.]
[0. 1.]
[0. 1.]


## Save to file and convert to tflite

In [25]:
# Saving the model to file
model_name = 'cnn_lyingback_running'

export_dir = 'models\' + model_name
tf.saved_model.save(model,export_dir)

# Convert the saved tensorflow model to tensorflow lite
converter = tf.lite.TFLiteConverter.from_saved_model(export_dir)
tflite_model = converter.convert()

# Save the tensorflow lite model to file
import pathlib
tflite_model_file = pathlib.Path('models/' + model_name + '.tflite')
tflite_model_file.write_bytes(tflite_model)

Instructions for updating:
If using Keras pass *_constraint arguments to layers.
INFO:tensorflow:Assets written to: models\cnn_walking_standing\assets


1234796

## Testing Sonia's clips (ignore this) 

In [49]:
sonia = pd.DataFrame()
data_path = 'data/sonia_tests/'

# Get a list of all respeck files in all the student folders
sonia_files = []
for f in os.listdir(data_path):
    files = [data_path + '/' + f]
    sonia_files += files
        
# Load them all into a df
df_sonia_0 = pd.read_csv(sonia_files[0], header=header_size)
df_sonia_0 = add_cols(sonia_files[0], df_sonia_0)
df_sonia_1 = pd.read_csv(sonia_files[1], header=header_size)
df_sonia_1 = add_cols(sonia_files[1], df_sonia_1)

Sensor type: Respeck
Activity type: Lying down on back
Activity code: 2
Subject id: s1704145
Notes: test
Sensor type: Respeck
Activity type: Running
Activity code: 11
Subject id: s1704145
Notes: test


In [40]:
header_size = 5
from typing import Tuple

def extract_header_info(filename: str, header_size: int = 5) -> Tuple[str, str, int, str, str]:
    """
    :param filename: Path to recording file.
    :param header_size: The size of the header, defaults to 5.
    :returns: A 5-tuple containing the sensor type, activity type, activity code, subject id and any notes.
    """
    sensor_type = ""
    activity_type = ""
    activity_code = -1
    subject_id = ""
    notes = ""

    with open(filename) as f:
        head = [next(f).rstrip().split('# ')[1] for x in range(header_size)]
        for l in head:
            print(l)

            title, value = l.split(":")

            if title == "Sensor type":
                sensor_type = value.strip()
            elif title == "Activity type":
                activity_type = value.strip()
            elif title == "Activity code":
                activity_code = int(value.strip())
            elif title == "Subject id":
                subject_id = value.strip()
            elif title == "Notes":
                notes = value.strip()
    
    return sensor_type, activity_type, activity_code, subject_id, notes

In [47]:
def add_cols(filename, df_respeck):
    sensor_type, activity_type, activity_code, subject_id, notes = extract_header_info(filename)
    df_respeck['sensor_type'] = sensor_type
    df_respeck['activity_type'] = activity_type
    df_respeck['activity_code'] = activity_code
    df_respeck['subject_id'] = subject_id
    df_respeck['notes'] = notes
    return df_respeck

In [51]:
df_sonia_0

Unnamed: 0,timestamp,accel_x,accel_y,accel_z,gyro_x,gyro_y,gyro_z,sensor_type,activity_type,activity_code,subject_id,notes
0,1634656108567,-0.408203,-0.072815,1.030701,1.875000,-0.390625,0.265625,Respeck,Lying down on back,2,s1704145,test
1,1634656108613,-0.401855,-0.066467,1.018494,2.359375,-0.609375,0.343750,Respeck,Lying down on back,2,s1704145,test
2,1634656108660,-0.401855,-0.081116,1.023132,2.718750,-0.718750,0.156250,Respeck,Lying down on back,2,s1704145,test
3,1634656108678,-0.403076,-0.075500,1.020203,3.281250,-1.328125,-0.187500,Respeck,Lying down on back,2,s1704145,test
4,1634656108729,-0.400879,-0.077209,1.031433,3.546875,-1.281250,-0.109375,Respeck,Lying down on back,2,s1704145,test
...,...,...,...,...,...,...,...,...,...,...,...,...
166,1634656115146,-0.401367,-0.060852,1.024109,1.718750,-0.312500,-0.015625,Respeck,Lying down on back,2,s1704145,test
167,1634656115180,-0.383545,-0.064270,0.998962,0.609375,0.234375,-0.171875,Respeck,Lying down on back,2,s1704145,test
168,1634656115228,-0.403809,-0.058411,1.026062,1.312500,-0.140625,-0.125000,Respeck,Lying down on back,2,s1704145,test
169,1634656115270,-0.403564,-0.062561,1.028503,1.921875,-0.500000,-0.296875,Respeck,Lying down on back,2,s1704145,test


In [58]:
window_size = 50 # 50 datapoints for the window size, which, at 25Hz, means 2 seconds
step_size = 50 # 25 is 50% overlap

window_number = 0 # start a counter at 0 to keep track of the window number

all_overlapping_windows = []

# Grouping by activity rather than recording_id because I only have sonia's here
for rid, group in df_sonia_0.groupby("activity_type"):
    large_enough_windows = [window for window in group.rolling(window=window_size, min_periods=window_size) if len(window) == window_size]

    overlapping_windows = large_enough_windows[::step_size] 

    # then we will append a window ID to each window
    for window in overlapping_windows:
        window.loc[:, 'window_id'] = window_number
        window_number += 1

    #print(overlapping_windows)

    all_overlapping_windows.append(pd.concat(overlapping_windows).reset_index(drop=True))
    
for rid, group in df_sonia_1.groupby("activity_type"):
    large_enough_windows = [window for window in group.rolling(window=window_size, min_periods=window_size) if len(window) == window_size]

    overlapping_windows = large_enough_windows[::step_size] 

    # then we will append a window ID to each window
    for window in overlapping_windows:
        window.loc[:, 'window_id'] = window_number
        window_number += 1

    #print(overlapping_windows)

    all_overlapping_windows.append(pd.concat(overlapping_windows).reset_index(drop=True))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = _infer_fill_value(value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)


In [60]:
final_sliding_windows_sonia = pd.concat(all_overlapping_windows).reset_index(drop=True)
final_sliding_windows_sonia

Unnamed: 0,timestamp,accel_x,accel_y,accel_z,gyro_x,gyro_y,gyro_z,sensor_type,activity_type,activity_code,subject_id,notes,window_id
0,1634656108567,-0.408203,-0.072815,1.030701,1.875000,-0.390625,0.265625,Respeck,Lying down on back,2,s1704145,test,0
1,1634656108613,-0.401855,-0.066467,1.018494,2.359375,-0.609375,0.343750,Respeck,Lying down on back,2,s1704145,test,0
2,1634656108660,-0.401855,-0.081116,1.023132,2.718750,-0.718750,0.156250,Respeck,Lying down on back,2,s1704145,test,0
3,1634656108678,-0.403076,-0.075500,1.020203,3.281250,-1.328125,-0.187500,Respeck,Lying down on back,2,s1704145,test,0
4,1634656108729,-0.400879,-0.077209,1.031433,3.546875,-1.281250,-0.109375,Respeck,Lying down on back,2,s1704145,test,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
295,1634656134598,0.220703,-0.528137,0.454529,-8.796875,21.234375,71.750000,Respeck,Running,11,s1704145,test,5
296,1634656134630,-0.621338,0.170349,0.033875,-22.546875,-20.468750,14.312500,Respeck,Running,11,s1704145,test,5
297,1634656134679,-0.301514,0.491638,-0.423889,-11.250000,31.093750,-59.859375,Respeck,Running,11,s1704145,test,5
298,1634656134729,0.607910,0.160339,-0.207581,-9.000000,19.640625,-15.203125,Respeck,Running,11,s1704145,test,5


In [63]:
X_test = []
for window_id, group in final_sliding_windows_sonia.groupby('window_id'):
    #print(f"window_id = {window_id}")
    
    shape = group[columns_of_interest].values.shape
    #print(f"shape = {shape}")
    
    X_test.append(group[columns_of_interest].values)
    
X_test = np.asarray(X_test)

In [67]:
y_test = final_sliding_windows_sonia.groupby("window_id")[['activity_type']].agg(np.min).replace(class_labels)['activity_type'].values
y_test = np.asarray(pd.get_dummies(y_test), dtype=np.float32)
y_test

array([[1., 0.],
       [1., 0.],
       [1., 0.],
       [0., 1.],
       [0., 1.],
       [0., 1.]], dtype=float32)

In [64]:
X_test.shape

(6, 50, 6)

In [68]:
y_pred_ohe = model.predict(X_test)
y_pred_labels = np.argmax(y_pred_ohe, axis=1)
y_true_labels = np.argmax(y_test, axis=1)

print(classification_report(y_true_labels, y_pred_labels))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00         3
           1       1.00      1.00      1.00         3

    accuracy                           1.00         6
   macro avg       1.00      1.00      1.00         6
weighted avg       1.00      1.00      1.00         6

