In [1]:
# description of this dataset http://groupware.les.inf.puc-rio.br/har#ixzz2PyRdbAfA
from sklearn import datasets
from sklearn import preprocessing as pp
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from sklearn.metrics import confusion_matrix
from sklearn.metrics import recall_score
import numpy
import csv
import pandas as pd
import time

le = pp.LabelEncoder() 
le.fit(['sitting', 'walking', 'sittingdown', 'standing', 'standingup'])

initial = time.time()
### Retrieving all data
overall = pd.read_csv("./dataset-har-PUC-Rio-ugulino.csv", delimiter=';', header='infer') 
data = overall.loc[:, "x1":"z4"].as_matrix() # has to be converted to ndarray in order to be processed by segment_signal()
targets = overall.loc[:,"class,,"].as_matrix() # double commas: looks like the researchers are naughty

load = time.time()
print "--- time to load and select datasets: %s seconds ---" % (load - initial)


### Data segmentation: shall use a sudden change of sensor readings
### like if (x_pre - x_curr <= 1.0, do nothing)
### Range of Accelerometer sensor readings is +3g/-3g

# reading 14 sets of data in every 2 seconds. 
# For segmenting the data from online only. 
# each set of data is taken 150ms apart from another.
# so choosing a window size of 14 will be 2.1 seconds.


def segment_signal(data, window_size=14): 

    N = data.shape[0]
    dim = data.shape[1]
    K = N/window_size
    segments = numpy.empty((K, window_size, dim))
    for i in range(K):
        segment = data[i*window_size:i*window_size+window_size,:]
        segments[i] = numpy.vstack(segment)
    return segments



##!!!! questions: for normalization, should it be done right after loading csv or after segmenation? 
##!!!! Normalize() can't process nadarray with dimension > 2.
X = pp.normalize(data)
y = targets[::14] 
y = y[:-1]# -1 because it will have a extra set of data than X.

normalizing = time.time()
print "--- time to normalize: %s seconds ---" % (normalizing - load)

segs = segment_signal(X)

segmenting = time.time()
print "--- time to segment: %s seconds ---" % (segmenting - normalizing)

### feautre extraction // take the difference between sensors

### this method is to extract the difference between consecutive sensor readings.
## parameter raw is a 2D ndarray
## return a 2D ndarray
def extract_diff(raw):

    N = raw.shape[0] # number of sets of sensor readings
    dim = raw.shape[1] # number of values in each readings
    features = numpy.empty((N - 1, dim))
    for i in range(1, N):
        for j in range(dim):
            features[i-1][j] = raw[i][j] - raw[i-1][j]

    return features

def extract_diff_2(raw):

    N = raw.shape[0] # number of segments of sensor readings ()
    I = raw.shape[1] # number of sets of readings (14)
    J = raw.shape[2] # number of values in each set of readings (12)
    feature_num = (I - 1) * J
    feature = numpy.empty((feature_num))
    features = numpy.empty((N, feature_num))
    for n in range(N):
        idx = 0;
        for i in range(1, I):
            for j in range(J):
                feature[idx] = raw[n][i][j] - raw[n][i-1][j]
                idx += 1
        features[n] = feature
        

    return features

features = extract_diff_2(segs)

extracting_feature = time.time()
print "--- time to extract features: %s seconds ---" % (extracting_feature - segmenting)

#having 15 neurons
kfold = KFold(n_splits=10, shuffle=True)

fold_index = 0
for train, test in kfold.split(features):
    clf = MLPClassifier(solver='lbfgs', alpha=1e-5,
                     hidden_layer_sizes=(15,), random_state=1).fit(features[train], y[train])
    predictions = clf.predict(features[test])
    accuracy = clf.score(features[test], y[test])
    cm = confusion_matrix(y[test], predictions)

    print('In the %i fold, the classification accuracy is %f' %(fold_index, accuracy))
    print('And the confusion matrix is: ')
    print(cm)
    fold_index += 1


evaluate_model = time.time()
print "--- time to extract features: %s seconds ---" % (evaluate_model - extracting_feature)





--- time to load and select datasets: 0.370768070221 seconds ---
--- time to normalize: 0.0338640213013 seconds ---
--- time to segment: 0.433979988098 seconds ---
--- time to extract features: 2.50458693504 seconds ---
In the 0 fold, the classification accuracy is 0.751479
And the confusion matrix is: 
[[301   1  49   2   3]
 [  4  45  15   9  17]
 [ 98   5 233   4   0]
 [  5  24   7  30  23]
 [  0  11   4  13 280]]
In the 1 fold, the classification accuracy is 0.730347
And the confusion matrix is: 
[[286   4  59   5   2]
 [  7  44  16  14   7]
 [121  10 210   2   2]
 [  5  26  10  43  13]
 [  1   5   4   6 281]]
In the 2 fold, the classification accuracy is 0.752325
And the confusion matrix is: 
[[284   1  47   3   2]
 [  4  34  14  22  10]
 [117   7 223   2   1]
 [  1  12   9  47  18]
 [  0   4   3  16 302]]
In the 3 fold, the classification accuracy is 0.732037
And the confusion matrix is: 
[[304   5  64   3   2]
 [  5  30  13  20  13]
 [102   2 195   4   1]
 [ 11  13  12  32  19]


In [4]:
features

array([[ -2.87208136e-05,   7.65424768e-03,  -3.98987512e-03, ...,
          2.08770027e-03,   1.45718677e-03,  -1.35025743e-03],
       [ -3.38788457e-03,  -3.72409690e-03,  -3.17991818e-03, ...,
         -2.96073633e-02,   9.56516229e-03,   5.84492524e-03],
       [ -3.28204196e-03,  -2.38530300e-02,   3.86786618e-03, ...,
         -1.64834330e-02,   2.71025473e-02,   7.01186619e-03],
       ..., 
       [ -5.04473344e-02,   2.80414516e-02,  -7.28476400e-03, ...,
          1.57633924e-01,   7.81363053e-02,   1.19943033e-01],
       [  6.37063483e-02,  -3.55810374e-02,  -1.70506174e-02, ...,
          1.71703230e-02,   6.26086851e-02,   2.82505322e-02],
       [ -7.94067404e-02,   7.39349977e-03,  -2.37049611e-02, ...,
         -4.67965236e-03,   2.24650548e-03,  -4.62998817e-04]])

In [5]:
features.shape

(11830, 156)

In [1]:
# description of this dataset http://groupware.les.inf.puc-rio.br/har#ixzz2PyRdbAfA
from sklearn import datasets
from sklearn import preprocessing as pp
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from sklearn.metrics import confusion_matrix
from sklearn.metrics import recall_score
import numpy
import csv
import pandas as pd
import time

le = pp.LabelEncoder() 
le.fit(['sitting', 'walking', 'sittingdown', 'standing', 'standingup'])

initial = time.time()
### Retrieving all data
overall = pd.read_csv("./dataset-har-PUC-Rio-ugulino.csv", delimiter=';', header='infer') 
data = overall.loc[:, "x1":"z4"].as_matrix() # has to be converted to ndarray in order to be processed by segment_signal()
targets = overall.loc[:,"class,,"].as_matrix() # double commas: looks like the researchers are naughty

load = time.time()
print "--- time to load and select datasets: %s seconds ---" % (load - initial)


### Data segmentation: shall use a sudden change of sensor readings
### like if (x_pre - x_curr <= 1.0, do nothing)
### Range of Accelerometer sensor readings is +3g/-3g

# reading 14 sets of data in every 2 seconds. 
# For segmenting the data from online only. 
# each set of data is taken 150ms apart from another.
# so choosing a window size of 14 will be 2.1 seconds.


def segment_signal(data, window_size=14): 

    N = data.shape[0]
    dim = data.shape[1]
    K = N/window_size
    segments = numpy.empty((K, window_size, dim))
    for i in range(K):
        segment = data[i*window_size:i*window_size+window_size,:]
        segments[i] = numpy.vstack(segment)
    return segments



##!!!! questions: for normalization, should it be done right after loading csv or after segmenation? 
##!!!! Normalize() can't process nadarray with dimension > 2.
X = pp.normalize(data)
y = targets[::14] 
y = y[:-1]# -1 because it will have a extra set of data than X.

normalizing = time.time()
print "--- time to normalize: %s seconds ---" % (normalizing - load)

segs = segment_signal(X)

segmenting = time.time()
print "--- time to segment: %s seconds ---" % (segmenting - normalizing)

### feautre extraction // take the difference between sensors

### this method is to extract the difference between consecutive sensor readings.
## parameter raw is a 2D ndarray
## return a 2D ndarray
def extract_diff(raw):

    N = raw.shape[0] # number of sets of sensor readings
    dim = raw.shape[1] # number of values in each readings
    features = numpy.empty((N - 1, dim))
    for i in range(1, N):
        for j in range(dim):
            features[i-1][j] = raw[i][j] - raw[i-1][j]

    return features

def extract_diff_2(raw):

    N = raw.shape[0] # number of segments of sensor readings ()
    I = raw.shape[1] # number of sets of readings (14)
    J = raw.shape[2] # number of values in each set of readings (12)
    feature_num = (I - 1) * J
    feature = numpy.empty((feature_num))
    features = numpy.empty((N, feature_num))
    for n in range(N):
        idx = 0;
        for i in range(1, I):
            for j in range(J):
                feature[idx] = raw[n][i][j] - raw[n][i-1][j]
                idx += 1
        features[n] = feature
        

    return features

features = extract_diff_2(segs)

extracting_feature = time.time()
print "--- time to extract features: %s seconds ---" % (extracting_feature - segmenting)

#having 15 neurons
kfold = KFold(n_splits=10, shuffle=True)

fold_index = 0
for train, test in kfold.split(features):
    clf = MLPClassifier(solver='lbfgs', alpha=1e-5,
                     hidden_layer_sizes=(15,), random_state=1).fit(features[train], y[train])
    predictions = clf.predict(features[test])
    accuracy = clf.score(features[test], y[test])
    cm = confusion_matrix(y[test], predictions)

    print('In the %i fold, the classification accuracy is %f' %(fold_index, accuracy))
    print('And the confusion matrix is: ')
    print(cm)
    fold_index += 1


evaluate_model = time.time()
print "--- time to extract features: %s seconds ---" % (evaluate_model - extracting_feature)





--- time to load and select datasets: 0.372848987579 seconds ---
--- time to normalize: 0.0270099639893 seconds ---
--- time to segment: 0.41557097435 seconds ---
--- time to extract features: 2.24505400658 seconds ---
In the 0 fold, the classification accuracy is 0.723584
And the confusion matrix is: 
[[314   6  52   2   0]
 [  2  35  25  15  11]
 [118   5 216   3   2]
 [  2  22  18  30  19]
 [  0  14   2   9 261]]
In the 1 fold, the classification accuracy is 0.736264
And the confusion matrix is: 
[[296   3  63   1   3]
 [  1  31  25  11   9]
 [107   5 217   5   0]
 [  1  18  12  31  18]
 [  2  10   5  13 296]]
In the 2 fold, the classification accuracy is 0.757396
And the confusion matrix is: 
[[281   3  66   4   0]
 [  5  44  10  18   6]
 [ 93  10 243   4   4]
 [  7  20   6  50  14]
 [  1  10   2   4 278]]
In the 3 fold, the classification accuracy is 0.771767
And the confusion matrix is: 
[[318   2  49   1   4]
 [  0  44  17  20   5]
 [105   7 222   1   2]
 [  1  19   9  42  13]
 

In [2]:
# description of this dataset http://groupware.les.inf.puc-rio.br/har#ixzz2PyRdbAfA
from sklearn import datasets
from sklearn import preprocessing as pp
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from sklearn.metrics import confusion_matrix
from sklearn.metrics import recall_score
import numpy
import csv
import pandas as pd
import time

le = pp.LabelEncoder() 
le.fit(['sitting', 'walking', 'sittingdown', 'standing', 'standingup'])

initial = time.time()
### Retrieving all data
overall = pd.read_csv("./dataset-har-PUC-Rio-ugulino.csv", delimiter=';', header='infer') 
data = overall.loc[:, "x1":"z4"].as_matrix() # has to be converted to ndarray in order to be processed by segment_signal()
targets = overall.loc[:,"class,,"].as_matrix() # double commas: looks like the researchers are naughty

load = time.time()
print "--- time to load and select datasets: %s seconds ---" % (load - initial)


### Data segmentation: shall use a sudden change of sensor readings
### like if (x_pre - x_curr <= 1.0, do nothing)
### Range of Accelerometer sensor readings is +3g/-3g

# reading 14 sets of data in every 2 seconds. 
# For segmenting the data from online only. 
# each set of data is taken 150ms apart from another.
# so choosing a window size of 14 will be 2.1 seconds.


def segment_signal(data, window_size=14): 

    N = data.shape[0]
    dim = data.shape[1]
    K = N/window_size
    segments = numpy.empty((K, window_size, dim))
    for i in range(K):
        segment = data[i*window_size:i*window_size+window_size,:]
        segments[i] = numpy.vstack(segment)
    return segments



##!!!! questions: for normalization, should it be done right after loading csv or after segmenation? 
##!!!! Normalize() can't process nadarray with dimension > 2.
X = pp.normalize(data)
y = targets[::14] 
y = y[:-1]# -1 because it will have a extra set of data than X.

normalizing = time.time()
print "--- time to normalize: %s seconds ---" % (normalizing - load)

segs = segment_signal(X)

segmenting = time.time()
print "--- time to segment: %s seconds ---" % (segmenting - normalizing)

### feautre extraction // take the difference between sensors

### this method is to extract the difference between consecutive sensor readings.
## parameter raw is a 2D ndarray
## return a 2D ndarray
def extract_diff(raw):

    N = raw.shape[0] # number of sets of sensor readings
    dim = raw.shape[1] # number of values in each readings
    features = numpy.empty((N - 1, dim))
    for i in range(1, N):
        for j in range(dim):
            features[i-1][j] = raw[i][j] - raw[i-1][j]

    return features

def extract_diff_2(raw):

    N = raw.shape[0] # number of segments of sensor readings ()
    I = raw.shape[1] # number of sets of readings (14)
    J = raw.shape[2] # number of values in each set of readings (12)
    feature_num = (I - 1) * J
    feature = numpy.empty((feature_num))
    features = numpy.empty((N, feature_num))
    for n in range(N):
        idx = 0;
        for i in range(1, I):
            for j in range(J):
                feature[idx] = raw[n][i][j] - raw[n][i-1][j]
                idx += 1
        features[n] = feature
        

    return features

features = extract_diff_2(segs)

extracting_feature = time.time()
print "--- time to extract features: %s seconds ---" % (extracting_feature - segmenting)

#having 15 neurons
kfold = KFold(n_splits=10, shuffle=True)

fold_index = 0
for train, test in kfold.split(features):
    clf = MLPClassifier(solver='lbfgs', alpha=1e-5,
                     hidden_layer_sizes=(15, 10), random_state=1).fit(features[train], y[train])
    predictions = clf.predict(features[test])
    accuracy = clf.score(features[test], y[test])
    cm = confusion_matrix(y[test], predictions)

    print('In the %i fold, the classification accuracy is %f' %(fold_index, accuracy))
    print('And the confusion matrix is: ')
    print(cm)
    fold_index += 1


evaluate_model = time.time()
print "--- time to extract features: %s seconds ---" % (evaluate_model - extracting_feature)





--- time to load and select datasets: 0.384927988052 seconds ---
--- time to normalize: 0.027195930481 seconds ---
--- time to segment: 0.471884012222 seconds ---
--- time to extract features: 2.31225395203 seconds ---
In the 0 fold, the classification accuracy is 0.715131
And the confusion matrix is: 
[[331   5  41   1   1]
 [  0  40   8  13   4]
 [134  13 180   4   3]
 [  1  32  11  18  38]
 [  1  14   3  10 277]]
In the 1 fold, the classification accuracy is 0.710059
And the confusion matrix is: 
[[291   8  60   0   4]
 [  2  43  16  23  10]
 [107   8 206   4   5]
 [  0  29   9  21  35]
 [  0   7   0  16 279]]
In the 2 fold, the classification accuracy is 0.746407
And the confusion matrix is: 
[[305   6  53   1   3]
 [  0  32  11  31   6]
 [ 96   9 193   4   5]
 [  0  24   8  39  23]
 [  0   3   0  17 314]]
In the 3 fold, the classification accuracy is 0.736264
And the confusion matrix is: 
[[313   7  50   3   3]
 [  1  54   7  30   5]
 [108  21 200   4   1]
 [  0  21   7  28  21]
 

In [5]:
# description of this dataset http://groupware.les.inf.puc-rio.br/har#ixzz2PyRdbAfA
from sklearn import datasets
from sklearn import preprocessing as pp
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from sklearn.metrics import confusion_matrix
from sklearn.metrics import recall_score
import numpy
import csv
import pandas as pd
import time

le = pp.LabelEncoder() 
le.fit(['sitting', 'walking', 'sittingdown', 'standing', 'standingup'])

initial = time.time()
### Retrieving all data
overall = pd.read_csv("./dataset-har-PUC-Rio-ugulino.csv", delimiter=';', header='infer') 
data = overall.loc[:, "x1":"z4"].as_matrix() # has to be converted to ndarray in order to be processed by segment_signal()
targets = overall.loc[:,"class,,"].as_matrix() # double commas: looks like the researchers are naughty

load = time.time()
print "--- time to load and select datasets: %s seconds ---" % (load - initial)


### Data segmentation: shall use a sudden change of sensor readings
### like if (x_pre - x_curr <= 1.0, do nothing)
### Range of Accelerometer sensor readings is +3g/-3g

# reading 14 sets of data in every 2 seconds. 
# For segmenting the data from online only. 
# each set of data is taken 150ms apart from another.
# so choosing a window size of 14 will be 2.1 seconds.


def segment_signal(data, window_size=14): 

    N = data.shape[0]
    dim = data.shape[1]
    K = N/window_size
    segments = numpy.empty((K, window_size, dim))
    for i in range(K):
        segment = data[i*window_size:i*window_size+window_size,:]
        segments[i] = numpy.vstack(segment)
    return segments



##!!!! questions: for normalization, should it be done right after loading csv or after segmenation? 
##!!!! Normalize() can't process nadarray with dimension > 2.
X = pp.normalize(data)
y = targets[::14] 
y = y[:-1]# -1 because it will have a extra set of data than X.

normalizing = time.time()
print "--- time to normalize: %s seconds ---" % (normalizing - load)

segs = segment_signal(X)

segmenting = time.time()
print "--- time to segment: %s seconds ---" % (segmenting - normalizing)

### feautre extraction // take the difference between sensors

### this method is to extract the difference between consecutive sensor readings.
## parameter raw is a 2D ndarray
## return a 2D ndarray
def extract_diff(raw):

    N = raw.shape[0] # number of sets of sensor readings
    dim = raw.shape[1] # number of values in each readings
    features = numpy.empty((N - 1, dim))
    for i in range(1, N):
        for j in range(dim):
            features[i-1][j] = raw[i][j] - raw[i-1][j]

    return features

def extract_diff_2(raw):

    N = raw.shape[0] # number of segments of sensor readings ()
    I = raw.shape[1] # number of sets of readings (14)
    J = raw.shape[2] # number of values in each set of readings (12)
    feature_num = (I - 1) * J
    feature = numpy.empty((feature_num))
    features = numpy.empty((N, feature_num))
    for n in range(N):
        idx = 0;
        for i in range(1, I):
            for j in range(J):
                feature[idx] = raw[n][i][j] - raw[n][i-1][j]
                idx += 1
        features[n] = feature
        

    return features

features = extract_diff_2(segs)

extracting_feature = time.time()
print "--- time to extract features: %s seconds ---" % (extracting_feature - segmenting)

#having 15 neurons
kfold = KFold(n_splits=10, shuffle=True)

fold_index = 0
for train, test in kfold.split(features):
    clf = MLPClassifier(solver='lbfgs', alpha=1e-5,
                     hidden_layer_sizes=(60, 30), random_state=1).fit(features[train], y[train])
    predictions = clf.predict(features[test])
    accuracy = clf.score(features[test], y[test])
    cm = confusion_matrix(y[test], predictions)

    print('In the %i fold, the classification accuracy is %f' %(fold_index, accuracy))
    print('And the confusion matrix is: ')
    print(cm)
    fold_index += 1


evaluate_model = time.time()
print "--- time to extract features: %s seconds ---" % (evaluate_model - extracting_feature)




--- time to load and select datasets: 0.406511068344 seconds ---
--- time to normalize: 0.0289750099182 seconds ---
--- time to segment: 0.430980920792 seconds ---
--- time to extract features: 2.33887004852 seconds ---
In the 0 fold, the classification accuracy is 0.783601
And the confusion matrix is: 
[[295   3  72   0   2]
 [  0  44  14  14   0]
 [ 92   7 239   3   0]
 [  0  16   5  50  12]
 [  0   4   3   9 299]]
In the 1 fold, the classification accuracy is 0.770921
And the confusion matrix is: 
[[297   4  59   1   1]
 [  0  39  14  17  11]
 [ 72   8 241   4   0]
 [  1  26  10  46  24]
 [  0   7   2  10 289]]
In the 2 fold, the classification accuracy is 0.777684
And the confusion matrix is: 
[[289   4  69   1   2]
 [  0  50  18  19   6]
 [ 86   3 241   2   1]
 [  0  12  10  43  20]
 [  0   1   2   7 297]]
In the 3 fold, the classification accuracy is 0.777684
And the confusion matrix is: 
[[282   6  49   1   2]
 [  0  42  18  18   8]
 [ 92   8 238   4   1]
 [  2  14  10  54  15]


In [6]:
clf.get_params

<bound method MLPClassifier.get_params of MLPClassifier(activation='relu', alpha=1e-05, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(60, 30), learning_rate='constant',
       learning_rate_init=0.001, max_iter=200, momentum=0.9,
       nesterovs_momentum=True, power_t=0.5, random_state=1, shuffle=True,
       solver='lbfgs', tol=0.0001, validation_fraction=0.1, verbose=False,
       warm_start=False)>

In [6]:
features.shape

(11830, 156)

In [None]:
# description of this dataset http://groupware.les.inf.puc-rio.br/har#ixzz2PyRdbAfA
from sklearn import datasets
from sklearn import preprocessing as pp
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from sklearn.metrics import confusion_matrix
from sklearn.metrics import recall_score
import numpy
import csv
import pandas as pd
import time

le = pp.LabelEncoder() 
le.fit(['sitting', 'walking', 'sittingdown', 'standing', 'standingup'])

initial = time.time()
### Retrieving all data
overall = pd.read_csv("./dataset-har-PUC-Rio-ugulino.csv", delimiter=';', header='infer') 
data = overall.loc[:, "x1":"z4"].as_matrix() # has to be converted to ndarray in order to be processed by segment_signal()
targets = overall.loc[:,"class,,"].as_matrix() # double commas: looks like the researchers are naughty

load = time.time()
print "--- time to load and select datasets: %s seconds ---" % (load - initial)


### Data segmentation: shall use a sudden change of sensor readings
### like if (x_pre - x_curr <= 1.0, do nothing)
### Range of Accelerometer sensor readings is +3g/-3g

# reading 14 sets of data in every 2 seconds. 
# For segmenting the data from online only. 
# each set of data is taken 150ms apart from another.
# so choosing a window size of 14 will be 2.1 seconds.


def segment_signal(data, window_size=14): 

    N = data.shape[0]
    dim = data.shape[1]
    K = N/window_size
    segments = numpy.empty((K, window_size, dim))
    for i in range(K):
        segment = data[i*window_size:i*window_size+window_size,:]
        segments[i] = numpy.vstack(segment)
    return segments



##!!!! questions: for normalization, should it be done right after loading csv or after segmenation? 
##!!!! Normalize() can't process nadarray with dimension > 2.
X = pp.normalize(data)
y = targets[::14] 
y = y[:-1]# -1 because it will have a extra set of data than X.

normalizing = time.time()
print "--- time to normalize: %s seconds ---" % (normalizing - load)

segs = segment_signal(X)

segmenting = time.time()
print "--- time to segment: %s seconds ---" % (segmenting - normalizing)

### feautre extraction // take the difference between sensors

### this method is to extract the difference between consecutive sensor readings.
## parameter raw is a 2D ndarray
## return a 2D ndarray
def extract_diff(raw):

    N = raw.shape[0] # number of sets of sensor readings
    dim = raw.shape[1] # number of values in each readings
    features = numpy.empty((N - 1, dim))
    for i in range(1, N):
        for j in range(dim):
            features[i-1][j] = raw[i][j] - raw[i-1][j]

    return features

def extract_diff_2(raw):

    N = raw.shape[0] # number of segments of sensor readings ()
    I = raw.shape[1] # number of sets of readings (14)
    J = raw.shape[2] # number of values in each set of readings (12)
    feature_num = (I - 1) * J
    feature = numpy.empty((feature_num))
    features = numpy.empty((N, feature_num))
    for n in range(N):
        idx = 0;
        for i in range(1, I):
            for j in range(J):
                feature[idx] = raw[n][i][j] - raw[n][i-1][j]
                idx += 1
        features[n] = feature
        

    return features

features = extract_diff_2(segs)

extracting_feature = time.time()
print "--- time to extract features: %s seconds ---" % (extracting_feature - segmenting)

#having 15 neurons
kfold = KFold(n_splits=10, shuffle=True)

fold_index = 0
for train, test in kfold.split(features):
    clf = MLPClassifier(solver='lbfgs', alpha=1e-5,
                     hidden_layer_sizes=(15,), random_state=1).fit(features[train], y[train])
    predictions = clf.predict(features[test])
    accuracy = clf.score(features[test], y[test])
    cm = confusion_matrix(y[test], predictions)

    print('In the %i fold, the classification accuracy is %f' %(fold_index, accuracy))
    print('And the confusion matrix is: ')
    print(cm)
    fold_index += 1


evaluate_model = time.time()
print "--- time to train model: %s seconds ---" % (evaluate_model - extracting_feature)



