In [15]:
from keras.models import Sequential
from keras.layers import LSTM, Dense, Flatten, Reshape, Dropout, Activation
from keras.utils import to_categorical
from sklearn.utils import shuffle

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pickle
from sklearn.model_selection import train_test_split

np.random.seed(66) # for reproductibility results


In [7]:
#Loading and Preparing the data
data = pickle.load(open("clean_data.p" , "rb"))

data_frame = pd.DataFrame()

#loading the file data and store them in data_frame
for k in range(1,16):
    index = 'n° '+str(k)
    data_frame = data_frame.append(pd.DataFrame(data[index]), ignore_index=True)

#getting the target     
y = data_frame['class'].values

print(y)

[1 1 1 ... 7 7 7]


In [8]:
#delete the class column from the data frame
del data_frame['class']

print(data_frame)

            x     y     z
0        1502  2215  2153
1        1667  2072  2047
2        1611  1957  1906
3        1601  1939  1831
4        1643  1965  1879
5        1604  1959  1921
6        1640  1829  1940
7        1607  1910  1910
8        1546  2045  1910
9        1529  2049  1972
10       1637  1978  1945
11       1596  2046  1866
12       1590  2006  1978
13       1601  1966  1957
14       1542  2003  1959
15       1598  2027  1941
16       1511  2258  1983
17       1555  1980  2023
18       1508  2468  1934
19       1580  1697  2005
20       1627  2073  1992
21       1592  2130  2063
22       1634  2088  1991
23       1638  2102  1916
24       1593  2123  1948
25       1542  2133  2034
26       1601  2015  2042
27       1613  1938  1936
28       1644  1974  2000
29       1642  1933  2046
...       ...   ...   ...
1923147  2039  2522  1999
1923148  2042  2537  2001
1923149  2057  2534  1987
1923150  2036  2557  2005
1923151  2048  2539  2003
1923152  2042  2519  2005
1923153  204

# Feature Scaling with scikit-learn

The **<span style="color:red;">RobustScaler</span>** uses a similar method to the **Min-Max scaler** but it instead uses the **interquartile range**,
rathar than the **min-max**, so that it is robust to **outliers**. Therefore it follows the formula:

<span style="color:blue;">RobustScaler</span> = $\frac{x_{i}-Q_{1}(x)}{Q_{3}(x)-Q_{1}(x)}$

In [9]:
from sklearn import preprocessing
import seaborn as sns


scaler = preprocessing.StandardScaler()
robust_scaled_df = scaler.fit_transform(data_frame)
robust_scaled_df = pd.DataFrame(robust_scaled_df, columns=['x', 'y', 'z'])

  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)


In [19]:
X = robust_scaled_df.values
print(X[:10])
print(y[:10])

X2, y2 = shuffle(X, y, random_state=66)
print(X2[:10])
print(y2[:10])


[[-4.36029703 -1.66947853  1.93152004]
 [-2.87836023 -3.09647188  0.80972225]
 [-3.3813206  -4.24405394 -0.68248047]
 [-3.47113495 -4.42367548 -1.47620532]
 [-3.09391468 -4.16422214 -0.96822141]
 [-3.44419065 -4.22409599 -0.5237355 ]
 [-3.12085898 -5.52136267 -0.32265854]
 [-3.41724634 -4.71306574 -0.64014848]
 [-3.96511388 -3.36590419 -0.64014848]
 [-4.11779828 -3.32598829  0.0159974 ]]
[1 1 1 1 1 1 1 1 1 1]
[[-1.00124029 -0.17263237  1.6351961 ]
 [ 1.02856404 -0.64164416 -0.12158157]
 [-1.62095932 -0.45204365  0.36523634]
 [-1.15392469 -0.48198057  0.21707436]
 [ 1.28004422  0.84522303 -1.26454536]
 [ 0.67828807 -0.41212775  0.70389227]
 [ 1.11837839 -0.85120263  0.89438624]
 [-0.02226387  0.00698917  0.36523634]
 [ 1.29800709 -0.31233801 -0.12158157]
 [-1.03716603 -0.02294775  0.97905022]]
[1 3 2 4 6 7 3 3 4 1]


In [20]:
X_train, X_test, y_train, y_test = train_test_split(X2, y2, test_size=0.2,  stratify=y, shuffle=True, random_state=66)

#Resizing the inputs to 3D dimension inputs (format needed by ) 
x_train = X_train.reshape(-1,1,3)
x_test  = X_test.reshape(-1,1,3)

#-1 : To start from 0 , cause one hot encoding of keras start from 0
y_train = y_train-1
y_test = y_test-1

In [21]:
data_dim = 3
timesteps = 1
num_classes = 7

#RNN Setting
NB_EPOCH = 15

#The batch size
BATCH_SIZE = 128

#In order to display the results
VERBOSE = 1

# the validation split used during the validation process represent 20% of the training data set
VALIDATION_SPLIT = 0.2


# expected input data shape: (batch_size, timesteps, data_dim) , return_sequences=True
model = Sequential()

#Using an LSTML RNN hidden layer which is very recommended in HAR deeplearning processing 
#the LSTM model to overcome the vanishing gradient problem that occurs with most Recurrent Neural Network models
model.add(LSTM(128, return_sequences=True, activation='relu', input_shape=(timesteps, data_dim)))  

model.add(LSTM(128, return_sequences=True, activation='relu'))

#This layer is used to resize the output from 3D to 2D 
model.add(Flatten())

#We use a dense layer in order to interpret the results
model.add(Dense(num_classes, activation='softmax'))

#RMSprop This optimizer is usually a good choice for recurrent neural networks.
model.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=['accuracy'])

#we display the summary of our model
model.summary()

#One hot encoding the categorical classes:
#One hot encoding is a process by which categorical variables are converted into a form that could be provided 
#to ML algorithms to do a better job in prediction.
yy_train = to_categorical(y_train,num_classes=num_classes)

#We train our training model with the specefied setting
model.fit(x_train, yy_train,
          batch_size=BATCH_SIZE, epochs= NB_EPOCH,
          verbose=VERBOSE, validation_split=VALIDATION_SPLIT, shuffle= True)

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_5 (LSTM)                (None, 1, 128)            67584     
_________________________________________________________________
lstm_6 (LSTM)                (None, 1, 128)            131584    
_________________________________________________________________
flatten_3 (Flatten)          (None, 128)               0         
_________________________________________________________________
dense_3 (Dense)              (None, 7)                 903       
Total params: 200,071
Trainable params: 200,071
Non-trainable params: 0
_________________________________________________________________
Train on 1230832 samples, validate on 307709 samples
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15

KeyboardInterrupt: 

In [11]:
#One hot encoding the categorical classes
yy_test = to_categorical(y_test,num_classes=num_classes)

score = model.evaluate(x_test, yy_test, verbose=VERBOSE)
print("Test score:", score[0])
print('Test accuracy:', score[1])

Test score: 0.7996113908314542
Test accuracy: 0.7314265955357191


In [12]:
#display the loss and the accurac
print(model.metrics_names)

['loss', 'acc']
