In [1]:
import os
import numpy as np
import pandas as pd


In [2]:
columns = ['user','activity','timestamp', 'x-axis', 'y-axis', 'z-axis']

wisdm=pd.read_csv("WISDM.txt",header=None,names=columns)

In [3]:
wisdm.count()

user         1098204
activity     1098204
timestamp    1098204
x-axis       1098204
y-axis       1098204
z-axis       1098203
dtype: int64

In [4]:
wisdm=wisdm.dropna()

In [5]:
wisdm.dtypes

user           int64
activity      object
timestamp      int64
x-axis       float64
y-axis       float64
z-axis        object
dtype: object

In [6]:
wisdm.user=wisdm.user.astype(int)

In [7]:
wisdm.activity=wisdm.activity.astype(str)

In [8]:
wisdm["z-axis"]=wisdm["z-axis"].astype(str)

In [9]:
wisdm["z-axis"]=wisdm["z-axis"].str.replace(";",'')

In [10]:
wisdm["z-axis"]=wisdm["z-axis"].astype(float)

In [11]:
wisdm.dtypes

user           int64
activity      object
timestamp      int64
x-axis       float64
y-axis       float64
z-axis       float64
dtype: object

In [12]:
wisdm.head()

Unnamed: 0,user,activity,timestamp,x-axis,y-axis,z-axis
0,33,Jogging,49105962326000,-0.694638,12.680544,0.503953
1,33,Jogging,49106062271000,5.012288,11.264028,0.953424
2,33,Jogging,49106112167000,4.903325,10.882658,-0.081722
3,33,Jogging,49106222305000,-0.612916,18.496431,3.023717
4,33,Jogging,49106332290000,-1.18497,12.108489,7.205164


In [13]:
wisdm.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1098203 entries, 0 to 1098203
Data columns (total 6 columns):
user         1098203 non-null int64
activity     1098203 non-null object
timestamp    1098203 non-null int64
x-axis       1098203 non-null float64
y-axis       1098203 non-null float64
z-axis       1098203 non-null float64
dtypes: float64(3), int64(2), object(1)
memory usage: 58.7+ MB


In [14]:
wisdm.describe()

Unnamed: 0,user,timestamp,x-axis,y-axis,z-axis
count,1098203.0,1098203.0,1098203.0,1098203.0,1098203.0
mean,18.86067,33409100000000.0,0.6628645,7.255642,0.4110616
std,10.21423,49449680000000.0,6.84906,6.746207,4.754109
min,1.0,0.0,-19.61,-19.61,-19.8
25%,10.0,2019128000000.0,-2.87,3.17,-2.22
50%,19.0,9722802000000.0,0.27,7.93,0.0
75%,28.0,49965720000000.0,4.44,11.56,2.72
max,36.0,209397400000000.0,19.95,20.04,19.61


In [15]:
wisdm.activity.unique()

array(['Jogging', 'Walking', 'Upstairs', 'Downstairs', 'Sitting',
       'Standing'], dtype=object)

In [16]:
wisdm.activity.value_counts()

Walking       424397
Jogging       342176
Upstairs      122869
Downstairs    100427
Sitting        59939
Standing       48395
Name: activity, dtype: int64

In [17]:
(wisdm.user.unique())

array([33, 17, 20, 29, 13, 15,  6, 27, 36, 18, 32, 35, 11, 16,  5, 10, 28,
       26, 14, 24, 12, 23,  4, 30, 34,  8, 31, 21,  3, 22,  1,  9, 25,  2,
        7, 19])

In [18]:
wisdm[wisdm["user"]==6].activity.value_counts()

Walking       12399
Jogging       11818
Sitting        1679
Upstairs       1666
Downstairs     1433
Standing        709
Name: activity, dtype: int64

In [21]:
from scipy import stats

In [24]:
N_TIME_STEPS = 81
N_FEATURES = 3
step = 20
segments = []
labels = []
for i in range(0, len(wisdm) - N_TIME_STEPS, step):
    xs = wisdm['x-axis'].values[i: i + N_TIME_STEPS]
    ys = wisdm['y-axis'].values[i: i + N_TIME_STEPS]
    zs = wisdm['z-axis'].values[i: i + N_TIME_STEPS]
    label = stats.mode(wisdm['activity'][i: i + N_TIME_STEPS])[0][0]
    segments.append([xs, ys, zs])
    labels.append(label)


In [25]:
np.unique(np.array(labels))

array(['Downstairs', 'Jogging', 'Sitting', 'Standing', 'Upstairs',
       'Walking'], dtype='|S10')

In [26]:
np.array(segments).shape


(54907, 3, 81)

In [27]:
reshaped_segments = np.asarray(segments, dtype= np.float32).reshape(-1, N_TIME_STEPS, N_FEATURES)
labels = np.asarray(pd.get_dummies(labels), dtype = np.float32)


In [28]:
reshaped_segments.shape


(54907, 81, 3)

In [29]:
labels[0]


array([0., 1., 0., 0., 0., 0.], dtype=float32)

In [30]:
from sklearn.model_selection import train_test_split

RANDOM_SEED=42

X_train, X_test, y_train, y_test = train_test_split(
        reshaped_segments, labels, test_size=0.2, random_state=RANDOM_SEED)


In [31]:
len(X_train)


43925

In [32]:
X_train.shape

(43925, 81, 3)

In [33]:
len(X_test)


10982

In [37]:
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Reshape
from keras.layers import Conv2D, MaxPooling2D,LSTM,Bidirectional
from keras.utils import np_utils
from keras.optimizers import Adam
from keras.regularizers import l2


In [38]:
from keras.layers import TimeDistributed
from keras.layers import Bidirectional


In [39]:
n_timesteps,n_features=81,3
epochs,batch_size=50,64
n_outputs=6

model = Sequential()
model.add(Bidirectional(LSTM(100, return_sequences=True,input_shape=(n_timesteps,n_features))))
model.add(Bidirectional(LSTM(100)))
#model.add(Dropout(0.25))
model.add(Dense(100, activation='relu'))
model.add(Dense(n_outputs, activation='softmax'))

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'] )
# fit network
model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size ,validation_split=0.2)
# evaluate model
_, accuracy = model.evaluate(X_test, y_test, batch_size=batch_size)


Train on 35140 samples, validate on 8785 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [40]:
# Test accuracy (Here we can observe validation accuracy also reaching 0.97 which
# is not the case with DNN model.)

accuracy

0.9736842105263158