In [2]:
import os
import scipy.io
import numpy as np
import pandas as pd

# Set data folder path
DATA_FOLDER = r'D:/NUS_TERM2_CA3/MAREA_dataset'

ACTIVITY_FOLDER = os.path.join(DATA_FOLDER, 'Activity Timings')
SUBJECT_FOLDER = os.path.join(DATA_FOLDER, 'Subject_Data_txt_format')


###########################################################################
# treadWalk       = indoor_time['indoorTime'][:,1:2]
# treadIncline    = indoor_time['indoorTime'][:,4:5]
# treadWalknRun   = indoor_time['indoorTime'][:,1:3]
# indoorWalk      = indoor_time['indoorTime'][:,6:7]
# indoorRun       = indoor_time['indoorTime'][:,6:8]
# outdoorWalk     = outdoor_time['outdoorTime'][:,1:2]
# outdoorWalknRun = outdoor_time['outdoorTime'][:,1:3]
###########################################################################

###########################################################################
# Subject numbers 1 to 11 are involved in Indoor Experiments
# Subject numbers 12 to 20 are involved in Outdoor Experiments
###########################################################################

###########################################################################
# There are five Indoor Activity labels:
# actIndex = 1 -> treadWalk
# actIndex = 2 -> treadIncline
# actIndex = 3 -> treadWalknRun
# actIndex = 4 -> indoorWalk
# actIndex = 5 -> indoorWalknRun

# There are two Outdoor Activity labels:
# actIndex = 1 -> outdoorWalk
# actIndex = 2 -> outdoorWalknRun
###########################################################################

###########################################################################
# There are four accelerometer positions (accPos) to choose from:
# accPos = 1 -> Left Foot
# accPos = 2 -> Right Foot
# accPos = 3 -> Waist
# accPos = 4 -> Wrist
###########################################################################

In [3]:
# Define Activity Labels
indoor_label = ['tread_flat_walk_start', 
                'tread_flat_walk_end',
                'tread_flat_run_end',
                'tread_slope_walk_start',
                'tread_slope_walk_end',
                'indoor_flat_walk_start',
                'indoor_flat_walk_end',
                'indoor_flat_run_end'
               ]

outdoor_label = ['outdoor_walk_start',
                 'outdoor_walk_end',
                 'outdoor_run_end']

indoor_time_df = pd.read_csv(os.path.join(ACTIVITY_FOLDER, 'Indoor Experiment Timings.txt')
                            , names= indoor_label)

outdoor_time_df = pd.read_csv(os.path.join(ACTIVITY_FOLDER, 'Outdoor Experiment Timings.txt')
                            , names=outdoor_label)

indoor_time_df["subject"] = ["Sub" + str(i) for i in range(1, 12)]
outdoor_time_df["subject"] = ["Sub" + str(j) for j in range(12, 21)]

print(indoor_time_df)
print(outdoor_time_df)

    tread_flat_walk_start  tread_flat_walk_end  tread_flat_run_end  \
0                       1                55931               85681   
1                       1                40911               84991   
2                       1                62261               83961   
3                       1                45781               84551   
4                       1                63971               85121   
5                       1                69381               84781   
6                       1                46331               83761   
7                       1                53801               84811   
8                       1                69131               84661   
9                       1                70041               84941   
10                      1                77641               84741   

    tread_slope_walk_start  tread_slope_walk_end  indoor_flat_walk_start  \
0                   102181                180281                  223681   
1      

In [4]:
pos_list = ['LF','RF','Waist','Wrist']
sub_list = ["Sub" + str(i) for i in range(1, 21)]
sub_list.remove('Sub4')


In [7]:
from scipy import sparse
from scipy.sparse.linalg import spsolve
import numpy as np
import pywt

def alsbase(y, lam, p, niter=10):
    L = len(y)
    D = sparse.diags([1,-2,1],[0,-1,-2], shape=(L,L-2))
    w = np.ones(L)
    for i in range(niter):
        W = sparse.spdiags(w, 0, L, L)
        Z = W + lam * D.dot(D.transpose())
        z = spsolve(Z, w*y)
        w = p * (y > z) + (1-p) * (y < z)
    return z

def denoise(signal_orig):
    coeffs_orig = pywt.wavedec(signal_orig, 'db4', level=2)
    coeffs_filter = coeffs_orig.copy()

    threshold = 0.8

    for i in range(1, len(coeffs_orig)):
        coeffs_filter[i] = pywt.threshold(coeffs_orig[i], threshold*max(coeffs_orig[i]))

    signal_denoised = pywt.waverec(coeffs_filter, 'db4')
    
    return signal_denoised

In [12]:

new_names = ['accX_LF', 'accY_LF', 'accZ_LF', 
            'accX_RF', 'accY_RF', 'accZ_RF', 
             'accX_Waist', 'accY_Waist', 'accZ_Waist', 
             'accX_Wrist', 'accY_Wrist', 'accZ_Wrist'            
            ]

sub_df = None

for sub in sub_list:
    lf_df = pd.read_csv(os.path.join(SUBJECT_FOLDER, sub + '_' + 'LF.txt'))
    rf_df = pd.read_csv(os.path.join(SUBJECT_FOLDER, sub + '_' + 'RF.txt'))
    waist_df = pd.read_csv(os.path.join(SUBJECT_FOLDER, sub + '_' + 'Waist.txt'))
    wrist_df = pd.read_csv(os.path.join(SUBJECT_FOLDER, sub + '_' + 'Wrist.txt'))
    sub_df = pd.concat([lf_df, rf_df, waist_df, wrist_df], axis=1)
    sub_df.columns = new_names
    print(sub_df.head())

    sub_df_new = sub_df.copy()
    sub_df_new = denoise(sub_df_new.values)
    sub_df_new.columns = new_names
    print(sub_df_new.head())

    for column in new_names:
        sub_df_new[column] = sub_df_new[column] - alsbase(sub_df_new[column], 10 ^ 5, 0.000005, niter=10)

    n = int(sub[3:])
    if n > 11:
        sub_row = outdoor_time_df[outdoor_time_df['subject'] == sub]
        tmp = sub_row.iloc[0]
        sub_df_new.loc[0:tmp['outdoor_walk_end'], 'label'] = 'outdoor_walk'
        sub_df_new.loc[tmp['outdoor_walk_end']: tmp['outdoor_run_end'], 'label'] = 'outdoor_run'
    else:
        sub_row = indoor_time_df[indoor_time_df['subject'] == sub]
        tmp = sub_row.iloc[0]
        sub_df_new.loc[0:tmp['tread_flat_walk_end'], 'label'] = 'tread_flat_walk'
        sub_df_new.loc[tmp['tread_flat_walk_end']: tmp['tread_flat_run_end'], 'label'] = 'tread_flat_run'
        sub_df_new.loc[tmp['tread_flat_run_end']: tmp['tread_slope_walk_start'], 'label'] = 'rest'
        sub_df_new.loc[tmp['tread_slope_walk_start']: tmp['tread_slope_walk_end'], 'label'] = 'tread_slope_walk'
        sub_df_new.loc[tmp['tread_slope_walk_end']: tmp['indoor_flat_walk_start'], 'label'] = 'rest'
        sub_df_new.loc[tmp['indoor_flat_walk_start']: tmp['indoor_flat_walk_end'], 'label'] = 'indoor_flat_walk'
        sub_df_new.loc[tmp['indoor_flat_walk_end']: tmp['indoor_flat_run_end'], 'label'] = 'indoor_flat_run'

    print(sub_df_new)
    sub_df_new.to_csv(sub + '_processed.csv')

   accX_LF  accY_LF  accZ_LF  accX_RF  accY_RF  accZ_RF  accX_Waist  \
0   -2.196  -11.765   -1.569    4.078   -6.902    2.196      -0.157   
1   -2.510  -10.353   -2.039    4.392   -8.000    1.255      -0.157   
2   -2.353  -10.824   -2.510    4.863   -8.314    0.941      -0.157   
3   -1.882  -10.667   -2.667    4.549   -9.412   -0.157      -0.157   
4   -1.412  -10.353   -2.824    3.451  -10.353   -0.784      -0.627   

   accY_Waist  accZ_Waist  accX_Wrist  accY_Wrist  accZ_Wrist  
0      -8.157       0.784       0.941     -11.451      -2.510  
1      -8.000       1.255       0.627     -10.510      -2.353  
2      -7.843       1.255       0.784     -10.196      -2.039  
3      -8.000       1.412       1.255     -10.039      -1.412  
4      -7.529       1.882       1.569     -10.196      -1.255  
(271681,)


ValueError: Length of values does not match length of index

In [3]:
import os

import pandas as pd

pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

DATA_FOLDER = r'D:/NUS_TERM2_CA3/MAREA_dataset'
PROCESSED_FOLDER = os.path.join(DATA_FOLDER, 'Processed_data')

sub_list = ["Sub" + str(i) for i in range(2, 21)]
sub_list.remove('Sub4')


full_df = pd.read_csv(os.path.join(PROCESSED_FOLDER, 'Sub1_processed.csv'))

full_df = full_df.drop(full_df.columns[[0]], axis=1)

# for sub in sub_list:
#     tmp_df = pd.read_csv(sub + '_' + 'processed.csv')
#     tmp_df = tmp_df.drop(tmp_df.columns[[0]], axis=1)
#
#     full_df.append(tmp_df)

print(full_df.head())




# print(X.corr())

    accX_LF   accY_LF   accZ_LF    accX_RF    accY_RF    accZ_RF  accX_Waist  accY_Waist  accZ_Waist  accX_Wrist  accY_Wrist  accZ_Wrist            label
0  0.332763  4.665276  1.203748  29.928555  39.261268  29.893765    4.085569   12.973653   16.509466    4.599891    0.317562   -0.000466  tread_flat_walk
1 -0.119334  6.199085  0.927106  30.093561  37.685433  28.674644    4.088255   13.046742   16.670780    4.285543    1.170696    0.114250  tread_flat_walk
2 -0.100431  5.849893  0.649464  30.415558  36.893585  28.082513    4.090938   13.119826   16.361089    4.442194    1.396830    0.385996  tread_flat_walk
3  0.240428  6.128695  0.685820  29.952524  35.317699  26.706353    4.093618   12.878897   16.208382    4.912840    1.465963    0.970805  tread_flat_walk
4  0.602893  6.564487  0.722174  28.705430  33.898736  25.801135    3.626290   13.265941   16.368641    5.226477    1.221095    1.085706  tread_flat_walk


In [7]:
df_tread_flat_walk = full_df[full_df['label'] == 'tread_flat_walk']
print(df_tread_flat_walk.shape)

df_tread_flat_run = full_df[full_df['label'] == 'tread_flat_run']
print(df_tread_flat_run.shape)

df_tread_slope_walk = full_df[full_df['label'] == 'tread_slope_walk']
print(df_tread_slope_walk.shape)

df_indoor_flat_walk = full_df[full_df['label'] == 'indoor_flat_walk']
print(df_indoor_flat_walk.shape)

df_rest = full_df[full_df['label'] == 'rest']
print(df_rest.shape)


(55931, 13)
(29750, 13)
(78100, 13)
(22700, 13)
(59900, 13)


In [23]:
window_size = 256
number_columns = 13

def reshape_df(df, window_size, number_columns):
    n_drop = df.shape[0] % window_size
    n_samples = df.shape[0] // window_size
    df = df[:-n_drop]
    return df.values.reshape(n_samples, window_size, number_columns), n_samples, df.iloc[0][12]



In [24]:
df_tread_flat_walk_3d, n_samples, label = reshape_df(df_tread_flat_walk, window_size, number_columns)

print(df_tread_flat_walk_3d.shape)
# print(df_tread_flat_walk_3d)

df_tread_flat_walk_array = df_tread_flat_walk_3d[:,:,:-1]
print(df_tread_flat_walk_array.shape)

df_tread_flat_walk_label = pd.Series([label for _ in range(n_samples)])
print(df_tread_flat_walk_label.shape)
print(df_tread_flat_walk_label.head())

(218, 256, 13)
(218, 256, 12)
(218,)
0    tread_flat_walk
1    tread_flat_walk
2    tread_flat_walk
3    tread_flat_walk
4    tread_flat_walk
dtype: object


In [17]:
y = full_df['label']
print(y.shape)
print(type(y))
X = full_df.drop('label', axis=1)

(271681,)
<class 'pandas.core.series.Series'>


In [3]:
from sklearn.model_selection import train_test_split, cross_val_score


#https://machinelearningmastery.com/index-slice-reshape-numpy-arrays-machine-learning-python/

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=1 / 4, random_state=5)

print(X_train.shape)
print(y_train.shape)
# print(X_train.describe())
# print(y_train.describe())

(203760, 12)
(203760,)


In [3]:
from sklearn.model_selection import train_test_split, cross_val_score

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=1 / 4, random_state=5)

print(X_train.shape)
print(y_train.shape)
print(X_train.describe())
print(y_train.describe())
# ------------------------------
# https://towardsdatascience.com/recurrent-neural-networks-by-example-in-python-ffd204f99470

from keras.models import Sequential
from keras.layers import LSTM, Dense, Dropout, Masking, Embedding

model = Sequential()
# Recurrent layer
model.add(LSTM(64, batch_input_shape=(8, 256,10), return_sequences=False, dropout=0.1, recurrent_dropout=0.1))

# Fully connected layer
model.add(Dense(64, activation='relu'))

# Dropout for regularization
model.add(Dropout(0.5))

# Output layer
model.add(Dense(6, activation='softmax'))

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

history = model.fit(X_train, y_train, epochs=20, batch_size=128, validation_split=0.2, verbose=1)

score = model.evaluate(X_test, y_test, verbose=0)
print('Test Loss:', score[0])
print('Test accuracy:', score[1])


Using TensorFlow backend.


ValueError: invalid literal for int() with base 10: 'tread_slope_walk'