In [2]:
!pip install pandas

Collecting pandas
  Downloading https://files.pythonhosted.org/packages/6b/88/672fcbab1fda7c3a2af192daf32885e065ff4046649247cebdc5cf7383a4/pandas-0.25.2-cp37-cp37m-win_amd64.whl (9.2MB)
Collecting pytz>=2017.2
  Downloading https://files.pythonhosted.org/packages/e7/f9/f0b53f88060247251bf481fa6ea62cd0d25bf1b11a87888e53ce5b7c8ad2/pytz-2019.3-py2.py3-none-any.whl (509kB)
Installing collected packages: pytz, pandas
Successfully installed pandas-0.25.2 pytz-2019.3


In [3]:
import os
import scipy.io
import numpy as np
import pandas as pd

# Set data folder path
DATA_FOLDER = r'D:/NUS_TERM2_CA3/MAREA_dataset'

ACTIVITY_FOLDER = os.path.join(DATA_FOLDER, 'Activity Timings')
SUBJECT_FOLDER = os.path.join(DATA_FOLDER, 'Subject_Data_txt_format')


###########################################################################
# treadWalk       = indoor_time['indoorTime'][:,1:2]
# treadIncline    = indoor_time['indoorTime'][:,4:5]
# treadWalknRun   = indoor_time['indoorTime'][:,1:3]
# indoorWalk      = indoor_time['indoorTime'][:,6:7]
# indoorRun       = indoor_time['indoorTime'][:,6:8]
# outdoorWalk     = outdoor_time['outdoorTime'][:,1:2]
# outdoorWalknRun = outdoor_time['outdoorTime'][:,1:3]
###########################################################################

###########################################################################
# Subject numbers 1 to 11 are involved in Indoor Experiments
# Subject numbers 12 to 20 are involved in Outdoor Experiments
###########################################################################

###########################################################################
# There are five Indoor Activity labels:
# actIndex = 1 -> treadWalk
# actIndex = 2 -> treadIncline
# actIndex = 3 -> treadWalknRun
# actIndex = 4 -> indoorWalk
# actIndex = 5 -> indoorWalknRun

# There are two Outdoor Activity labels:
# actIndex = 1 -> outdoorWalk
# actIndex = 2 -> outdoorWalknRun
###########################################################################

###########################################################################
# There are four accelerometer positions (accPos) to choose from:
# accPos = 1 -> Left Foot
# accPos = 2 -> Right Foot
# accPos = 3 -> Waist
# accPos = 4 -> Wrist
###########################################################################

In [3]:
# Define Activity Labels
indoor_label = ['tread_flat_walk_start', 
                'tread_flat_walk_end',
                'tread_flat_run_end',
                'tread_slope_walk_start',
                'tread_slope_walk_end',
                'indoor_flat_walk_start',
                'indoor_flat_walk_end',
                'indoor_flat_run_end'
               ]

outdoor_label = ['outdoor_walk_start',
                 'outdoor_walk_end',
                 'outdoor_run_end']

indoor_time_df = pd.read_csv(os.path.join(ACTIVITY_FOLDER, 'Indoor Experiment Timings.txt')
                            , names= indoor_label)

outdoor_time_df = pd.read_csv(os.path.join(ACTIVITY_FOLDER, 'Outdoor Experiment Timings.txt')
                            , names=outdoor_label)

indoor_time_df["subject"] = ["Sub" + str(i) for i in range(1, 12)]
outdoor_time_df["subject"] = ["Sub" + str(j) for j in range(12, 21)]

print(indoor_time_df)
print(outdoor_time_df)

    tread_flat_walk_start  tread_flat_walk_end  tread_flat_run_end  \
0                       1                55931               85681   
1                       1                40911               84991   
2                       1                62261               83961   
3                       1                45781               84551   
4                       1                63971               85121   
5                       1                69381               84781   
6                       1                46331               83761   
7                       1                53801               84811   
8                       1                69131               84661   
9                       1                70041               84941   
10                      1                77641               84741   

    tread_slope_walk_start  tread_slope_walk_end  indoor_flat_walk_start  \
0                   102181                180281                  223681   
1      

In [4]:
pos_list = ['LF','RF','Waist','Wrist']
sub_list = ["Sub" + str(i) for i in range(1, 21)]
sub_list.remove('Sub4')


In [7]:
from scipy import sparse
from scipy.sparse.linalg import spsolve
import numpy as np
import pywt

def alsbase(y, lam, p, niter=10):
    L = len(y)
    D = sparse.diags([1,-2,1],[0,-1,-2], shape=(L,L-2))
    w = np.ones(L)
    for i in range(niter):
        W = sparse.spdiags(w, 0, L, L)
        Z = W + lam * D.dot(D.transpose())
        z = spsolve(Z, w*y)
        w = p * (y > z) + (1-p) * (y < z)
    return z

def denoise(signal_orig):
    coeffs_orig = pywt.wavedec(signal_orig, 'db4', level=2)
    coeffs_filter = coeffs_orig.copy()

    threshold = 0.8

    for i in range(1, len(coeffs_orig)):
        coeffs_filter[i] = pywt.threshold(coeffs_orig[i], threshold*max(coeffs_orig[i]))

    signal_denoised = pywt.waverec(coeffs_filter, 'db4')
    
    return signal_denoised

In [12]:

new_names = ['accX_LF', 'accY_LF', 'accZ_LF', 
            'accX_RF', 'accY_RF', 'accZ_RF', 
             'accX_Waist', 'accY_Waist', 'accZ_Waist', 
             'accX_Wrist', 'accY_Wrist', 'accZ_Wrist'            
            ]

sub_df = None

for sub in sub_list:
    lf_df = pd.read_csv(os.path.join(SUBJECT_FOLDER, sub + '_' + 'LF.txt'))
    rf_df = pd.read_csv(os.path.join(SUBJECT_FOLDER, sub + '_' + 'RF.txt'))
    waist_df = pd.read_csv(os.path.join(SUBJECT_FOLDER, sub + '_' + 'Waist.txt'))
    wrist_df = pd.read_csv(os.path.join(SUBJECT_FOLDER, sub + '_' + 'Wrist.txt'))
    sub_df = pd.concat([lf_df, rf_df, waist_df, wrist_df], axis=1)
    sub_df.columns = new_names
    print(sub_df.head())

    sub_df_new = sub_df.copy()
    sub_df_new = denoise(sub_df_new.values)
    sub_df_new.columns = new_names
    print(sub_df_new.head())

    for column in new_names:
        sub_df_new[column] = sub_df_new[column] - alsbase(sub_df_new[column], 10 ^ 5, 0.000005, niter=10)

    n = int(sub[3:])
    if n > 11:
        sub_row = outdoor_time_df[outdoor_time_df['subject'] == sub]
        tmp = sub_row.iloc[0]
        sub_df_new.loc[0:tmp['outdoor_walk_end'], 'label'] = 'outdoor_walk'
        sub_df_new.loc[tmp['outdoor_walk_end']: tmp['outdoor_run_end'], 'label'] = 'outdoor_run'
    else:
        sub_row = indoor_time_df[indoor_time_df['subject'] == sub]
        tmp = sub_row.iloc[0]
        sub_df_new.loc[0:tmp['tread_flat_walk_end'], 'label'] = 'tread_flat_walk'
        sub_df_new.loc[tmp['tread_flat_walk_end']: tmp['tread_flat_run_end'], 'label'] = 'tread_flat_run'
        sub_df_new.loc[tmp['tread_flat_run_end']: tmp['tread_slope_walk_start'], 'label'] = 'rest'
        sub_df_new.loc[tmp['tread_slope_walk_start']: tmp['tread_slope_walk_end'], 'label'] = 'tread_slope_walk'
        sub_df_new.loc[tmp['tread_slope_walk_end']: tmp['indoor_flat_walk_start'], 'label'] = 'rest'
        sub_df_new.loc[tmp['indoor_flat_walk_start']: tmp['indoor_flat_walk_end'], 'label'] = 'indoor_flat_walk'
        sub_df_new.loc[tmp['indoor_flat_walk_end']: tmp['indoor_flat_run_end'], 'label'] = 'indoor_flat_run'

    print(sub_df_new)
    sub_df_new.to_csv(sub + '_processed.csv')

   accX_LF  accY_LF  accZ_LF  accX_RF  accY_RF  accZ_RF  accX_Waist  \
0   -2.196  -11.765   -1.569    4.078   -6.902    2.196      -0.157   
1   -2.510  -10.353   -2.039    4.392   -8.000    1.255      -0.157   
2   -2.353  -10.824   -2.510    4.863   -8.314    0.941      -0.157   
3   -1.882  -10.667   -2.667    4.549   -9.412   -0.157      -0.157   
4   -1.412  -10.353   -2.824    3.451  -10.353   -0.784      -0.627   

   accY_Waist  accZ_Waist  accX_Wrist  accY_Wrist  accZ_Wrist  
0      -8.157       0.784       0.941     -11.451      -2.510  
1      -8.000       1.255       0.627     -10.510      -2.353  
2      -7.843       1.255       0.784     -10.196      -2.039  
3      -8.000       1.412       1.255     -10.039      -1.412  
4      -7.529       1.882       1.569     -10.196      -1.255  
(271681,)


ValueError: Length of values does not match length of index

In [58]:
import os

import pandas as pd

pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

DATA_FOLDER = r'D:/NUS_TERM2_CA3/MAREA_dataset'
PROCESSED_FOLDER = os.path.join(DATA_FOLDER, 'Processed_data')

sub_list = ["Sub" + str(i) for i in range(2, 21)]
sub_list.remove('Sub4')


full_df = pd.read_csv(os.path.join(PROCESSED_FOLDER, 'Sub1_processed.csv'))
full_df = full_df.drop(full_df.columns[[0]], axis=1)

print(full_df.shape)

for sub in sub_list:
    tmp_df = pd.read_csv(os.path.join(PROCESSED_FOLDER, sub + '_processed.csv'))
    tmp_df = tmp_df.drop(tmp_df.columns[[0]], axis=1)

    print('Loading ' + sub + '_processed.csv')
    print(tmp_df.shape)
    full_df = full_df.append(tmp_df, ignore_index = True)

print(full_df.shape)


(271681, 13)
LoadingSub2_processed.csv
(336391, 13)
LoadingSub3_processed.csv
(257861, 13)
LoadingSub5_processed.csv
(257021, 13)
LoadingSub6_processed.csv
(235981, 13)
LoadingSub7_processed.csv
(255761, 13)
LoadingSub8_processed.csv
(270311, 13)
LoadingSub9_processed.csv
(246761, 13)
LoadingSub10_processed.csv
(240741, 13)
LoadingSub11_processed.csv
(276041, 13)
LoadingSub12_processed.csv
(49651, 13)
LoadingSub13_processed.csv
(71011, 13)
LoadingSub14_processed.csv
(43581, 13)
LoadingSub15_processed.csv
(33140, 13)
LoadingSub16_processed.csv
(46651, 13)
LoadingSub17_processed.csv
(47261, 13)
LoadingSub18_processed.csv
(48801, 13)
LoadingSub19_processed.csv
(47141, 13)
LoadingSub20_processed.csv
(32201, 13)
(3067988, 13)


In [69]:
df_tread_flat_walk = full_df[full_df['label'] == 'tread_flat_walk']
print(df_tread_flat_walk.shape)

df_tread_flat_run = full_df[full_df['label'] == 'tread_flat_run']
print(df_tread_flat_run.shape)

df_tread_slope_walk = full_df[full_df['label'] == 'tread_slope_walk']
print(df_tread_slope_walk.shape)

df_indoor_flat_walk = full_df[full_df['label'] == 'indoor_flat_walk']
print(df_indoor_flat_walk.shape)

df_indoor_flat_run = full_df[full_df['label'] == 'indoor_flat_run']
print(df_indoor_flat_run.shape)

df_rest = full_df[full_df['label'] == 'rest']
print(df_rest.shape)

df_outdoor_walk = full_df[full_df['label'] == 'outdoor_walk']
print(df_outdoor_walk.shape)

df_outdoor_run = full_df[full_df['label'] == 'outdoor_run']
print(df_outdoor_run.shape)


(609400, 13)
(238050, 13)
(819700, 13)
(229200, 13)
(236600, 13)
(515600, 13)
(235839, 13)
(183599, 13)


In [64]:
window_size = 256
number_columns = 13

activity_to_num_mapping = {
    "rest":0,
    "tread_flat_walk":1,
    "tread_flat_run":2,
    "tread_slope_walk":3,
    "indoor_flat_walk":4,
    "indoor_flat_run":5,
    
    "outdoor_walk":6,
    "outdoor_run":7
}


def reshape_df(df, window_size, number_columns):
    n_drop = df.shape[0] % window_size
    n_samples = df.shape[0] // window_size
    df = df[:-n_drop]
    
    label = activity_to_num_mapping.get(df.iloc[0][12])
    label_series = pd.Series([label for _ in range(n_samples)])
    
    return df.values.reshape(n_samples, window_size, number_columns), label_series


In [72]:
df_tread_flat_walk_3d, ds_tread_flat_walk_label = reshape_df(df_tread_flat_walk, window_size, number_columns)
print(df_tread_flat_walk_3d.shape)
print(ds_tread_flat_walk_label.shape)

df_tread_flat_run_3d, ds_tread_flat_run_label = reshape_df(df_tread_flat_run, window_size, number_columns)
print(df_tread_flat_run_3d.shape)
print(ds_tread_flat_run_label.shape)

df_tread_slope_walk_3d, ds_tread_slope_walk_label = reshape_df(df_tread_slope_walk, window_size, number_columns)
print(df_tread_slope_walk_3d.shape)
df_indoor_flat_walk_3d, ds_indoor_flat_walk_label = reshape_df(df_indoor_flat_walk, window_size, number_columns)
print(df_indoor_flat_walk_3d.shape)
df_indoor_flat_run_3d, ds_indoor_flat_run_label = reshape_df(df_indoor_flat_run, window_size, number_columns)
print(df_indoor_flat_run_3d.shape)
df_outdoor_walk_3d, ds_outdoor_walk_label = reshape_df(df_outdoor_walk, window_size, number_columns)
print(df_outdoor_walk_3d.shape)
df_outdoor_run_3d, ds_outdoor_run_label = reshape_df(df_outdoor_run, window_size, number_columns)
print(df_outdoor_run_3d.shape)
df_rest_3d, ds_rest_label = reshape_df(df_rest, window_size, number_columns)
print(df_rest_3d.shape)

full_df_3d = np.vstack((df_tread_flat_walk_3d, df_tread_flat_run_3d,
                        df_tread_slope_walk_3d, df_indoor_flat_walk_3d,
                        df_indoor_flat_run_3d, df_outdoor_walk_3d,
                        df_outdoor_run_3d, df_rest_3d
                       ))

print(full_df_3d.shape)

full_ds_label = ds_tread_flat_walk_label.append(ds_tread_flat_run_label)
full_ds_label= full_ds_label.append(ds_tread_slope_walk_label)
full_ds_label= full_ds_label.append(ds_indoor_flat_walk_label)
full_ds_label= full_ds_label.append(ds_indoor_flat_run_label)
full_ds_label= full_ds_label.append(ds_outdoor_walk_label)
full_ds_label= full_ds_label.append(ds_outdoor_run_label)
full_ds_label= full_ds_label.append(ds_rest_label)

print(full_ds_label.shape)


(2380, 256, 13)
(2380,)
(929, 256, 13)
(929,)
(3201, 256, 13)
(895, 256, 13)
(924, 256, 13)
(921, 256, 13)
(717, 256, 13)
(2014, 256, 13)
(11981, 256, 13)
(11981,)


In [74]:
df_array = full_df_3d[:,:,:-1]
print(df_array.shape)

from keras.utils.np_utils import to_categorical

y_cat = to_categorical(full_ds_label, num_classes=8)
print(y_cat)
print(y_cat.shape)

(11981, 256, 12)
[[0. 1. 0. ... 0. 0. 0.]
 [0. 1. 0. ... 0. 0. 0.]
 [0. 1. 0. ... 0. 0. 0.]
 ...
 [1. 0. 0. ... 0. 0. 0.]
 [1. 0. 0. ... 0. 0. 0.]
 [1. 0. 0. ... 0. 0. 0.]]
(11981, 8)


In [95]:
y = y_cat
X = df_array

padding_number = 19 #12000 - 11981 

df_padding_X = df_array[0:19,:,:]
df_padding_y = y_cat[0:19,:]

print(df_padding_X.shape)
print(df_padding_y.shape)

X = np.vstack((X, df_padding_X))
y = np.vstack((y, df_padding_y))

print(X.shape)
print(y.shape)

(19, 256, 12)
(19, 8)
(12000, 256, 12)
(12000, 8)


In [32]:
# y = full_df['label']
# print(y.shape)
# print(type(y))
# X = full_df.drop('label', axis=1)

In [11]:
!pip install sklearn

Collecting sklearn
  Using cached https://files.pythonhosted.org/packages/1e/7a/dbb3be0ce9bd5c8b7e3d87328e79063f8b263b2b1bfa4774cb1147bfcd3f/sklearn-0.0.tar.gz
Collecting scikit-learn
  Using cached https://files.pythonhosted.org/packages/d6/9e/6a42486ffa64711fb868e5d4a9167153417e7414c3d8d3e0d627cf391e1e/scikit_learn-0.21.3-cp37-cp37m-win_amd64.whl
Collecting joblib>=0.11
  Downloading https://files.pythonhosted.org/packages/8f/42/155696f85f344c066e17af287359c9786b436b1bf86029bb3411283274f3/joblib-0.14.0-py2.py3-none-any.whl (294kB)
Installing collected packages: joblib, scikit-learn, sklearn
    Running setup.py install for sklearn: started
    Running setup.py install for sklearn: finished with status 'done'
Successfully installed joblib-0.14.0 scikit-learn-0.21.3 sklearn-0.0


In [96]:
from sklearn.model_selection import train_test_split, cross_val_score


#https://machinelearningmastery.com/index-slice-reshape-numpy-arrays-machine-learning-python/

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=1 / 4, random_state=5)

print(X_train.shape)
print(y_train.shape)
# print(X_train.describe())
# print(y_train.describe())

(9000, 256, 12)
(9000, 8)


In [16]:
!pip install keras

Collecting keras
  Downloading https://files.pythonhosted.org/packages/ad/fd/6bfe87920d7f4fd475acd28500a42482b6b84479832bdc0fe9e589a60ceb/Keras-2.3.1-py2.py3-none-any.whl (377kB)
Collecting h5py
  Downloading https://files.pythonhosted.org/packages/a1/6b/7f62017e3f0b32438dd90bdc1ff0b7b1448b6cb04a1ed84f37b6de95cd7b/h5py-2.10.0-cp37-cp37m-win_amd64.whl (2.5MB)
Collecting pyyaml
  Using cached https://files.pythonhosted.org/packages/bc/3f/4f733cd0b1b675f34beb290d465a65e0f06b492c00b111d1b75125062de1/PyYAML-5.1.2-cp37-cp37m-win_amd64.whl
Collecting keras-preprocessing>=1.0.5
  Using cached https://files.pythonhosted.org/packages/28/6a/8c1f62c37212d9fc441a7e26736df51ce6f0e38455816445471f10da4f0a/Keras_Preprocessing-1.1.0-py2.py3-none-any.whl
Collecting keras-applications>=1.0.6
  Using cached https://files.pythonhosted.org/packages/71/e3/19762fdfc62877ae9102edf6342d71b28fbfd9dea3d2f96a882ce099b03f/Keras_Applications-1.0.8-py3-none-any.whl
Installing collected packages: h5py, pyyaml, keras-pr

In [18]:
!pip install tensorflow-gpu

Collecting tensorflow-gpu
  Downloading https://files.pythonhosted.org/packages/63/13/ea9ff554aa0043540a2387c28dd7926575eb25cf89e598caecea836d189d/tensorflow_gpu-2.0.0-cp37-cp37m-win_amd64.whl (285.3MB)
Collecting astor>=0.6.0
  Using cached https://files.pythonhosted.org/packages/d1/4f/950dfae467b384fc96bc6469de25d832534f6b4441033c39f914efd13418/astor-0.8.0-py2.py3-none-any.whl
Collecting gast==0.2.2
  Using cached https://files.pythonhosted.org/packages/4e/35/11749bf99b2d4e3cceb4d55ca22590b0d7c2c62b9de38ac4a4a7f4687421/gast-0.2.2.tar.gz
Collecting tensorboard<2.1.0,>=2.0.0
  Downloading https://files.pythonhosted.org/packages/9b/a6/e8ffa4e2ddb216449d34cfcb825ebb38206bee5c4553d69e7bc8bc2c5d64/tensorboard-2.0.0-py3-none-any.whl (3.8MB)
Collecting grpcio>=1.8.6
  Downloading https://files.pythonhosted.org/packages/0d/bc/60eeb61f97837475dae356afa797c54ea6db986afaf6c6d6320a572ff8aa/grpcio-1.24.3-cp37-cp37m-win_amd64.whl (1.6MB)
Processing c:\users\guofe\appdata\local\pip\cache\wheels\7c\0

In [None]:
# ------------------------------
# https://towardsdatascience.com/recurrent-neural-networks-by-example-in-python-ffd204f99470
from keras.models import Sequential
from keras.layers import LSTM, Dense, Dropout, Masking, Embedding

model = Sequential()

batch_size = 100

# Recurrent layer
model.add(LSTM(64, batch_input_shape=(batch_size, 256, 12), return_sequences=False, dropout=0.1, recurrent_dropout=0.1))

# Fully connected layer
model.add(Dense(64, activation='relu'))

# Dropout for regularization
model.add(Dropout(0.5))

# Output layer
model.add(Dense(8, activation='softmax'))

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

STEPS = X_train.shape[0] // 20
# VALID_STEPS = validation_generator.n // 20

history = model.fit(X_train, y_train, epochs=20, batch_size=batch_size, validation_split=0.2, verbose=1)


Train on 7200 samples, validate on 1800 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20

In [None]:
score = model.evaluate(X_test, y_test, verbose=0)
print('Test Loss:', score[0])
print('Test accuracy:', score[1])