In [None]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Wed Sep 27 14:34:20 2017
This is a small project for CNN in KERAS.
This file creates, trains and save a convolutional neural network for
Human Acitivity Recognition. The data we used for this file is released and provided by
Wireless Sensor Data Mining (WISDM) lab and can be found on this link.
http://www.cis.fordham.edu/wisdm/dataset.php  
Feel free to use this code and site this repositry if you use it for your reports or project.
@author: Muhammad Shahnawaz
"""
# importing libraries and dependecies 
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats
from keras.models import Sequential
from keras.layers import Dense, Conv2D, MaxPooling2D, Flatten, Dropout
#from keras import backend as K
from keras import optimizers

# setting up a random seed for reproducibility
random_seed = 611
np.random.seed(random_seed)

# matplotlib inline
plt.style.use('ggplot')

# defining function for loading the dataset
def readData(filePath):
# attributes of the dataset
    columnNames = ['user_id','activity','timestamp','x-axis','y-axis','z-axis']
    data = pd.read_csv(filePath,header = None, names=columnNames,na_values=';')
    return data

# defining a function for feature normalization
# (feature - mean)/stdiv
def featureNormalize(dataset):
    mu = np.mean(dataset,axis=0) #mean 평균값
    sigma = np.std(dataset,axis=0) #std 표준편차
    return (dataset-mu)/sigma

# defining the function to plot a single axis data
def plotAxis(axis,x,y,title):
    axis.plot(x,y)
    axis.set_title(title)
    axis.xaxis.set_visible(False)
    axis.set_ylim([min(y)-np.std(y),max(y)+np.std(y)])
    axis.set_xlim([min(x),max(x)])
    axis.grid(True)

# defining a function to plot the data for a given activity
def plotActivity(activity,data):
    fig,(ax0,ax1,ax2) = plt.subplots(nrows=3, figsize=(15,10),sharex=True)
    plotAxis(ax0,data['timestamp'],data['x-axis'],'x-axis')
    plotAxis(ax1,data['timestamp'],data['y-axis'],'y-axis')
    plotAxis(ax2,data['timestamp'],data['z-axis'],'z-axis')
    plt.subplots_adjust(hspace=0.2)
    fig.suptitle(activity)
    plt.subplots_adjust(top=0.9)
    plt.show()

# defining a window function for segmentation purposes
# data.count() 값이 약 1,000,000 이상으로 너무 크므로 200,000으로 두고 테스트
def windows(data, size):
    start = 0
    while start < data.count():
        yield int(start), int(start + size)
        start += (size/2)

# segmenting the time series
def segment_signal(data, window_size = 90):
    segments = np.empty((0,window_size,3))
    labels= np.empty((0))

    i = 0
    for (start, end) in windows(data['timestamp'],window_size):
        x = data['x-axis'][start:end]
        y = data['y-axis'][start:end]
        z = data['z-axis'][start:end]
        if(len(data['timestamp'][start:end])==window_size):
            segments = np.vstack([segments, np.dstack([x,y,z])])
            labels = np.append(labels,stats.mode(data['activity'][start:end])[0][0])
#print (i)
            i = i+1
    return segments, labels

# main program #
# columnNames = ['user_id','activity','timestamp','x-axis','y-axis','z-axis']

dataset = readData('/content/drive/MyDrive/python/data/actitracker_raw.txt')
for i in range(5):
    print (dataset['activity'][i], dataset['timestamp'][i], dataset['x-axis'][i])

# plotting a subset of the data to visualize
for activity in np.unique(dataset['activity']):
    subset = dataset[dataset['activity']==activity][:180]
    plotActivity(activity,subset)

# segmenting the signal in overlapping windows of 90 samples with 50% overlap
segments, labels = segment_signal(dataset) 

# categorically defining the classes of the activities
labels = np.asarray(pd.get_dummies(labels), dtype=np.int8)

# defining parameters for the input and network layers
# we are treating each segment or chunk as a 2D image (90 X 3)

print (segments.shape)

h = segments.shape[1]
w = segments.shape[2]
numChannels = 1
numFilters = 128 # number of filters in Conv2D layer
numNueronsFCL1 = 128 # number of filters in fully connected layers
numNueronsFCL2 = 128

trainRatio = 0.8  # split ratio for test and validation
Epochs = 10 # number of epochs
batchSize = 10 # batchsize
numClasses = labels.shape[1] # number of total clases
dropOutRatio = 0.2  # dropout ratio for dropout layer

# reshaping the data for network input
reshapedSegments = segments.reshape(segments.shape[0], h, w, 1)

# splitting in training and testing data
trainSplit = np.random.rand(len(reshapedSegments)) < trainRatio
trainX = reshapedSegments[trainSplit]
testX = reshapedSegments[~trainSplit]
trainX = np.nan_to_num(trainX)
testX = np.nan_to_num(testX)
trainY = labels[trainSplit]
testY = labels[~trainSplit]

def cnnModel():

    model = Sequential()

    model.add(Conv2D(numFilters, (2,2), input_shape=(h, w,1), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2,2),padding='valid'))
    model.add(Dropout(dropOutRatio))

    model.add(Flatten())
    model.add(Dense(numNueronsFCL1, activation='relu'))
    model.add(Dense(numNueronsFCL2, activation='relu'))
    model.add(Dense(numClasses, activation='softmax'))

    adam = optimizers.Adam(lr = 0.001, decay=1e-6)
    model.compile(loss='categorical_crossentropy', optimizer=adam, metrics=['accuracy'])
    return model


model = cnnModel()
model.summary()

model.fit(trainX, trainY, validation_split=1-trainRatio, epochs=10, batch_size=32, verbose=2)

score = model.evaluate(testX,testY,verbose=2)
print('Correct Classification : %.2f%%' %(score[1]*100))

model.save('model.h5')
np.save('groundTruth.npy',testY)
np.save('testData.npy',testX)