In [2]:
import pandas as pd
import numpy as np
import tsfresh
from sklearn.model_selection import train_test_split, LeaveOneOut
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score
import tensorflow as tf

In [3]:
import os

import matplotlib.pyplot as plt

# keras goodies
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation, Flatten, Conv1D, Dropout, MaxPooling1D, BatchNormalization
from tensorflow.keras import optimizers
from tensorflow.keras import regularizers
from tensorflow.keras import metrics as kmetrics
import tensorflow.keras.backend as K

In [4]:
import itertools

In [5]:
base_df = pd.read_csv('Respeck_recordings_clean.csv')

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,


In [6]:
len(base_df)

492676

In [8]:
columns_of_interest = ['accel_x', 'accel_y', 'accel_z', 'gyro_x', 'gyro_y', 'gyro_z']

In [9]:
nan_indices = set()
for i in range(len(base_df)):
    for col in columns_of_interest:
        if pd.isnull(base_df.loc[i, col]):
            nan_indices.add(i)

In [10]:
base_df.drop(list(nan_indices), inplace=True)

In [11]:
subject_dataframes = []

In [12]:
for rid, group in base_df.groupby("subject_id"):
    subject_dataframes.append(group)

In [13]:
n = len(subject_dataframes)
n

46

In [15]:
window_size = 50 # 50 datapoints for the window size, which, at 25Hz, means 2 seconds
step_size = 25 # this is 50% overlap

window_number = 0 # start a counter at 0 to keep track of the window number

all_overlapping_windows = []

for rid, group in base_df.groupby("recording_id"):
    # print(f"Processing rid = {rid}")
    
    large_enough_windows = [window for window in group.rolling(window=window_size, min_periods=window_size) if len(window) == window_size]
    
    overlapping_windows = large_enough_windows[::step_size] 
    
    # then we will append a window ID to each window
    for window in overlapping_windows:
        window.loc[:, 'window_id'] = window_number
        window_number += 1
    
    if len(overlapping_windows) == 0:
            continue
    
    all_overlapping_windows.append(pd.concat(overlapping_windows).reset_index(drop=True))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = value
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(loc, value, pi)


In [16]:
final_sliding_windows = pd.concat(all_overlapping_windows).reset_index(drop=True)

In [17]:
final_sliding_windows.columns

Index(['timestamp', 'accel_x', 'accel_y', 'accel_z', 'gyro_x', 'gyro_y',
       'gyro_z', 'sensor_type', 'activity_type', 'activity_code', 'subject_id',
       'notes', 'recording_id', 'window_id'],
      dtype='object')

In [18]:
subject_ids = list(set(base_df['subject_id']))

In [19]:
training_partitions = [final_sliding_windows[final_sliding_windows.subject_id != s] for s in subject_ids]

In [20]:
testing_partitions = [final_sliding_windows[final_sliding_windows.subject_id == s] for s in subject_ids]

In [21]:
print(f"The data was collected using the sensors: {base_df.sensor_type.unique()}")
print(f"The data was collected for the activities: {base_df.activity_type.unique()}")
print(f"The number of unique recordings is: {len(base_df.recording_id.unique())}")
print(f"The subject IDs in the recordings are: {len(base_df.subject_id.unique())}")

The data was collected using the sensors: ['Respeck']
The data was collected for the activities: ['Climbing stairs' 'Standing' 'Movement' 'Lying down on stomach'
 'Sitting bent backward' 'Lying down left' 'Lying down on back'
 'Descending stairs' 'Sitting bent forward' 'Walking at normal speed'
 'Running' 'Lying down right' 'Desk work' 'Sitting' 'Falling on knees'
 'Falling on the back' 'Falling on the right' 'Falling on the left']
The number of unique recordings is: 876
The subject IDs in the recordings are: 46


In [24]:
class_labels = {
    'Falling on the left':0,
    'Falling on knees':1,
    'Falling on the back':2,
    'Descending stairs':3,
    'Standing':4,
    'Lying down right':5,
    'Walking at normal speed':6,
    'Lying down on back':7,
    'Desk work':8,
    'Running':9,
    'Climbing stairs':10,
    'Falling on the right':11,
    'Sitting bent backward':12,
    'Sitting bent forward':13,
    'Lying down left':14,
    'Movement':15,
    'Lying down on stomach':16,
    'Sitting':17
}

In [25]:
len(final_sliding_windows)

922350

In [26]:
n

46

In [28]:
# randomly select five partitions and train the model on them
random_partitions = [4, 17, 26, 31, 40]

In [46]:
# get the training and testing features
feature_lists_train = []
feature_lists_test = []

In [47]:
for i in random_partitions:
    print(i)
    # now extract all features
    feature_list = []

    for col in columns_of_interest:
        new_features = tsfresh.extract_features(timeseries_container=training_partitions[i], column_id='window_id',
                            column_value=col, default_fc_parameters=tsfresh.feature_extraction.MinimalFCParameters())
        feature_list.append(new_features)

    feature_list = pd.concat(feature_list, axis=1)
    
    feature_lists_train.append(feature_list)

4


Feature Extraction: 100%|██████████| 15/15 [00:03<00:00,  4.70it/s]
Feature Extraction: 100%|██████████| 15/15 [00:03<00:00,  4.48it/s]
Feature Extraction: 100%|██████████| 15/15 [00:03<00:00,  4.73it/s]
Feature Extraction: 100%|██████████| 15/15 [00:03<00:00,  4.41it/s]
Feature Extraction: 100%|██████████| 15/15 [00:03<00:00,  4.75it/s]
Feature Extraction: 100%|██████████| 15/15 [00:03<00:00,  4.41it/s]

17



Feature Extraction: 100%|██████████| 15/15 [00:03<00:00,  4.75it/s]
Feature Extraction: 100%|██████████| 15/15 [00:03<00:00,  4.49it/s]
Feature Extraction: 100%|██████████| 15/15 [00:03<00:00,  4.78it/s]
Feature Extraction: 100%|██████████| 15/15 [00:03<00:00,  4.61it/s]
Feature Extraction: 100%|██████████| 15/15 [00:03<00:00,  4.58it/s]
Feature Extraction: 100%|██████████| 15/15 [00:03<00:00,  4.73it/s]


26


Feature Extraction: 100%|██████████| 15/15 [00:03<00:00,  4.57it/s]
Feature Extraction: 100%|██████████| 15/15 [00:03<00:00,  4.59it/s]
Feature Extraction: 100%|██████████| 15/15 [00:03<00:00,  4.74it/s]
Feature Extraction: 100%|██████████| 15/15 [00:03<00:00,  4.58it/s]
Feature Extraction: 100%|██████████| 15/15 [00:03<00:00,  4.72it/s]
Feature Extraction: 100%|██████████| 15/15 [00:03<00:00,  4.48it/s]

31



Feature Extraction: 100%|██████████| 15/15 [00:03<00:00,  4.70it/s]
Feature Extraction: 100%|██████████| 15/15 [00:03<00:00,  4.59it/s]
Feature Extraction: 100%|██████████| 15/15 [00:03<00:00,  4.77it/s]
Feature Extraction: 100%|██████████| 15/15 [00:03<00:00,  4.59it/s]
Feature Extraction: 100%|██████████| 15/15 [00:03<00:00,  4.82it/s]
Feature Extraction: 100%|██████████| 15/15 [00:03<00:00,  4.73it/s]


40


Feature Extraction: 100%|██████████| 15/15 [00:03<00:00,  4.60it/s]
Feature Extraction: 100%|██████████| 15/15 [00:03<00:00,  4.79it/s]
Feature Extraction: 100%|██████████| 15/15 [00:03<00:00,  4.45it/s]
Feature Extraction: 100%|██████████| 15/15 [00:03<00:00,  4.76it/s]
Feature Extraction: 100%|██████████| 15/15 [00:03<00:00,  4.48it/s]
Feature Extraction: 100%|██████████| 15/15 [00:03<00:00,  4.73it/s]


In [48]:
for i in random_partitions:
    print(i)
    # now extract all features
    feature_list = []

    for col in columns_of_interest:
        new_features = tsfresh.extract_features(timeseries_container=testing_partitions[i], column_id='window_id',
                            column_value=col, default_fc_parameters=tsfresh.feature_extraction.MinimalFCParameters())
        feature_list.append(new_features)

    feature_list = pd.concat(feature_list, axis=1)
    
    feature_lists_test.append(feature_list)

4


Feature Extraction: 100%|██████████| 15/15 [00:00<00:00, 235.69it/s]
Feature Extraction: 100%|██████████| 15/15 [00:00<00:00, 241.09it/s]
Feature Extraction: 100%|██████████| 15/15 [00:00<00:00, 220.32it/s]
Feature Extraction: 100%|██████████| 15/15 [00:00<00:00, 239.99it/s]
Feature Extraction: 100%|██████████| 15/15 [00:00<00:00, 244.83it/s]
Feature Extraction: 100%|██████████| 15/15 [00:00<00:00, 237.91it/s]

17



Feature Extraction: 100%|██████████| 15/15 [00:00<00:00, 210.84it/s]
Feature Extraction: 100%|██████████| 15/15 [00:00<00:00, 209.10it/s]
Feature Extraction: 100%|██████████| 15/15 [00:00<00:00, 207.43it/s]
Feature Extraction: 100%|██████████| 15/15 [00:00<00:00, 214.80it/s]
Feature Extraction: 100%|██████████| 15/15 [00:00<00:00, 215.25it/s]
Feature Extraction: 100%|██████████| 15/15 [00:00<00:00, 214.25it/s]

26



Feature Extraction: 100%|██████████| 15/15 [00:00<00:00, 242.14it/s]
Feature Extraction: 100%|██████████| 15/15 [00:00<00:00, 240.04it/s]
Feature Extraction: 100%|██████████| 15/15 [00:00<00:00, 246.19it/s]
Feature Extraction: 100%|██████████| 15/15 [00:00<00:00, 240.77it/s]
Feature Extraction: 100%|██████████| 15/15 [00:00<00:00, 238.91it/s]
Feature Extraction: 100%|██████████| 15/15 [00:00<00:00, 244.66it/s]

31



Feature Extraction: 100%|██████████| 15/15 [00:00<00:00, 208.13it/s]
Feature Extraction: 100%|██████████| 15/15 [00:00<00:00, 205.64it/s]
Feature Extraction: 100%|██████████| 15/15 [00:00<00:00, 203.93it/s]
Feature Extraction: 100%|██████████| 15/15 [00:00<00:00, 183.54it/s]
Feature Extraction: 100%|██████████| 15/15 [00:00<00:00, 203.94it/s]
Feature Extraction: 100%|██████████| 15/15 [00:00<00:00, 203.88it/s]

40



Feature Extraction: 100%|██████████| 15/15 [00:00<00:00, 208.44it/s]
Feature Extraction: 100%|██████████| 15/15 [00:00<00:00, 210.32it/s]
Feature Extraction: 100%|██████████| 15/15 [00:00<00:00, 212.97it/s]
Feature Extraction: 100%|██████████| 15/15 [00:00<00:00, 209.76it/s]
Feature Extraction: 100%|██████████| 15/15 [00:00<00:00, 216.67it/s]
Feature Extraction: 100%|██████████| 15/15 [00:00<00:00, 208.78it/s]


In [50]:
training_partitions[4].groupby("window_id")[['activity_type']].agg(np.min)
# testing_partitions[4].groupby("window_id")[['activity_type']].agg(np.min)

Unnamed: 0_level_0,activity_type
window_id,Unnamed: 1_level_1
0,Climbing stairs
1,Climbing stairs
2,Climbing stairs
3,Climbing stairs
4,Climbing stairs
...,...
18442,Lying down on back
18443,Lying down on back
18444,Lying down on back
18445,Lying down on back


In [51]:
window_id_class_labels_train_4 = training_partitions[4].groupby("window_id")[['activity_type']].agg(np.min).replace(class_labels)
window_id_class_labels_train_4

Unnamed: 0_level_0,activity_type
window_id,Unnamed: 1_level_1
0,10
1,10
2,10
3,10
4,10
...,...
18442,7
18443,7
18444,7
18445,7


In [55]:
window_id_class_labels_test_4 = testing_partitions[4].groupby("window_id")[['activity_type']].agg(np.min).replace(class_labels)
window_id_class_labels_test_4

Unnamed: 0_level_0,activity_type
window_id,Unnamed: 1_level_1
5991,8
5992,8
5993,8
5994,8
5995,8
...,...
6335,6
6336,6
6337,6
6338,6


In [54]:
set(window_id_class_labels_train_4['activity_type'])

{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17}

In [58]:
X_train_4 = feature_lists_train[0].to_numpy()
X_test_4 = feature_lists_test[0].to_numpy()

In [63]:
y_train_4 = window_id_class_labels_train_4.to_numpy()
y_test_4 = window_id_class_labels_test_4.to_numpy()

In [66]:
len(y_train_4) + len(y_test_4)

18447

In [77]:
accuracy_scores = []

rfc = RandomForestClassifier()

for i in range(len(random_partitions)):
    print(i, end=' ')
    X_train = feature_lists_train[i].to_numpy()
    X_test = feature_lists_test[i].to_numpy()
    y_train = training_partitions[random_partitions[i]].groupby("window_id")[['activity_type']].agg(np.min).replace(class_labels).to_numpy()
    y_test = testing_partitions[random_partitions[i]].groupby("window_id")[['activity_type']].agg(np.min).replace(class_labels).to_numpy()
    rfc.fit(X_train, y_train.ravel())
    y_pred = rfc.predict(X_test)
    accuracy_scores.append(accuracy_score(y_test, y_pred))

0 1 2 3 4 

In [81]:
[100*x for x in accuracy_scores]

[35.816618911174785,
 60.591133004926114,
 83.62068965517241,
 32.098765432098766,
 82.01970443349754]

In [82]:
sum(accuracy_scores)/len(accuracy_scores)

0.5882938228737393

In [145]:
filters = 64
kernel_size = 3
n_features = 6
activation='sigmoid'
n_classes = 18

In [146]:
window_size

50

In [150]:
model = Sequential()

model.add(Conv1D(filters=filters, kernel_size=kernel_size, activation='sigmoid', 
                 input_shape=(window_size, n_features)))
model.add(BatchNormalization())
model.add(Activation(activation))

model.add(Dropout(0.4))

model.add(Conv1D(filters=filters, kernel_size=kernel_size, activation='sigmoid'))
model.add(BatchNormalization())
model.add(Activation(activation))

model.add(Dropout(0.4))

model.add(Conv1D(filters=filters, kernel_size=kernel_size, activation='sigmoid'))
model.add(BatchNormalization())
model.add(Activation(activation))

model.add(Flatten())
model.add(Dense(100, activation='sigmoid'))
model.add(Dense(n_classes, activation='softmax'))

model.compile(
    optimizer=optimizers.SGD(lr=0.1, momentum=0.9),
    loss='binary_crossentropy',
    metrics = ['accuracy'])

model.summary()

Model: "sequential_7"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1d_16 (Conv1D)           (None, 48, 64)            1216      
_________________________________________________________________
batch_normalization_16 (Batc (None, 48, 64)            256       
_________________________________________________________________
activation_16 (Activation)   (None, 48, 64)            0         
_________________________________________________________________
dropout_8 (Dropout)          (None, 48, 64)            0         
_________________________________________________________________
conv1d_17 (Conv1D)           (None, 46, 64)            12352     
_________________________________________________________________
batch_normalization_17 (Batc (None, 46, 64)            256       
_________________________________________________________________
activation_17 (Activation)   (None, 46, 64)           

In [151]:
X_train = []
y_train = []

for window_id, group in training_partitions[40].groupby('window_id'):
    # print(f"window_id = {window_id}")
    shape = group[columns_of_interest].values.shape
    # print(f"shape = {shape}")    
    X_train.append(group[columns_of_interest].values)
    y_train.append(class_labels[group["activity_type"].values[0]])

In [152]:
X_train = np.asarray(X_train)
y_train = np.asarray(y_train)

print(f"X train shape = {X_train.shape}")
print(f"y train shape = {y_train.shape}")

X train shape = (18041, 50, 6)
y train shape = (18041,)


In [153]:
X_test = []
y_test = []

for window_id, group in testing_partitions[40].groupby('window_id'):
    # print(f"window_id = {window_id}")
    shape = group[columns_of_interest].values.shape
    # print(f"shape = {shape}")    
    X_test.append(group[columns_of_interest].values)
    y_test.append(class_labels[group["activity_type"].values[0]])

In [154]:
X_test = np.asarray(X_test)
y_test = np.asarray(y_test)

print(f"X test shape = {X_test.shape}")
print(f"y test shape = {y_test.shape}")

X test shape = (406, 50, 6)
y test shape = (406,)


In [155]:
y_train = np.asarray(pd.get_dummies(y_train), dtype=np.float32)
y_test = np.asarray(pd.get_dummies(y_test), dtype=np.float32)

In [156]:
print(f"X_train shape = {X_train.shape}")
print(f"y_train shape = {y_train.shape}")

print(f"X_test shape = {X_test.shape}")
print(f"y_test shape = {y_test.shape}")

X_train shape = (18041, 50, 6)
y_train shape = (18041, 18)
X_test shape = (406, 50, 6)
y_test shape = (406, 14)


In [157]:
model.fit(X_train, y_train, epochs=5)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<tensorflow.python.keras.callbacks.History at 0x7f24bc9324c0>

In [158]:
# stats
y_pred_ohe = model.predict(X_test)
y_pred_labels = np.argmax(y_pred_ohe, axis=1)
y_true_labels = np.argmax(y_test, axis=1)

In [159]:
print("*" * 80)
print("Classification report")
print("*" * 80)
print(classification_report(y_true_labels, y_pred_labels))

********************************************************************************
Classification report
********************************************************************************
              precision    recall  f1-score   support

           0       0.00      0.00      0.00      29.0
           1       0.00      0.00      0.00      29.0
           2       0.00      0.00      0.00      29.0
           3       0.00      0.00      0.00      29.0
           4       0.00      0.00      0.00      29.0
           5       0.00      0.00      0.00      29.0
           6       0.00      0.00      0.00      29.0
           7       0.00      0.00      0.00      29.0
           8       0.00      0.00      0.00      29.0
           9       0.00      0.00      0.00      29.0
          10       0.00      0.00      0.00      29.0
          11       0.00      0.00      0.00      29.0
          12       0.00      0.00      0.00      29.0
          13       0.00      0.00      0.00      29.0
     

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
