### Summary
- This model shows that a dancer can be accurately identified with an average 98% accuracy (5-fold cross validation) if we can access the dancer's training data.
- This has interesting privacy implications and is discussed in Section 6.4 Ethical Impact on the Final Report.

In [1]:
import numpy as np 
import pandas as pd 
import os
import seaborn as sns
import matplotlib.pyplot as plt
import tensorflow as tf
import pickle
from glob import glob
from timeit import default_timer as timer
import sys
import scipy.stats as stats

from sklearn import preprocessing
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, cross_val_score, KFold, StratifiedKFold, GridSearchCV
from sklearn.neighbors import KNeighborsClassifier as knn
from sklearn.metrics import accuracy_score, classification_report
from sklearn.decomposition import PCA
from sklearn.linear_model import LogisticRegression
from tune_sklearn import TuneGridSearchCV

from keras.utils import np_utils,to_categorical
from keras.optimizers import RMSprop, Adam
from keras.models import Sequential
from keras.layers import Flatten, Dense, Dropout, BatchNormalization, Conv1D, Conv2D, MaxPooling2D, MaxPooling1D
from keras.callbacks import ReduceLROnPlateau, ModelCheckpoint, EarlyStopping
from keras import optimizers

from sklearn.metrics import confusion_matrix
from mlxtend.plotting import plot_confusion_matrix

pd.set_option("display.max_rows", 100, "display.max_columns", 100)
np.set_printoptions(threshold=100) # 'sys.maxsize' for max

In [2]:
# modified from Sean's extractDataset.py

import sys
import os
import csv
import numpy as np
import pandas as pd
import random

import matplotlib.pyplot as plt
from matplotlib.widgets import TextBox, Button, RadioButtons
from mpl_toolkits.mplot3d import Axes3D
from matplotlib import gridspec

from sklearn import preprocessing

# Definitions
BASEPATH = os.getcwd()
# SAVEFOLDER = ''
# TESTFOLDER = ''
DANCEMOVENAMES = ("dab","elbowkick","gun","hair","listen","pointhigh","sidepump","wipetable")
IS_RETURN_DATAFRAME = True

#Changeable Parameters
NUMBER_OF_AFTER_SAMPLES = 3 #Number of samples to include in dance move after end detected
NUMBER_OF_BEFORE_SAMPLES = 5 #Number of samples to include in dance move before start detected
MINIMUM_MOVE_TIME = 25 #Minimum number of samples to be considered a move. Set this too low and you will get garbage dance samples

IS_PAD = True
PAD_NUM =20

USE_MAX_SAMPLES = True
MAX_SAMPLES = 100
USETEMP = True
USEACCEL= True
TEMPORALDATAAUGNUM = 5
NUMRANDOMSHIFTSACCEL = 5
Z_RAND_MAX = 0.05


class rawDataset():
    def __init__(self, device, movename, timestamp, dataset):
        self.device = device
        self.movename = movename
        self.timestamp = timestamp
        self.dataset = dataset
    def plot(self):
        fig = plt.figure()
        gs = gridspec.GridSpec(3, 1, width_ratios=[1], height_ratios=[0.2,1,1])
        index = [ x for x in range(len(self.dataset['activation_List']))]
        # print(index)
        # TODO plot the activation
        activation = fig.add_subplot(gs[0])
        accel = fig.add_subplot(gs[1])
        gyro = fig.add_subplot(gs[2])

        activation.set_title("Device " + str(self.device) + "\nActivation" )
        accel.set_title("Accel" )
        gyro.set_title("Gyro" )


        ax1, = accel.plot(index, self.dataset['a_xList'], label = "X")
        ax2, = accel.plot(index, self.dataset['a_yList'], label = "Y")
        ax3, = accel.plot(index, self.dataset['a_zList'], label = "Z")

        ax4, = gyro.plot(index, self.dataset['g_xList'], label = "X")
        ax5, = gyro.plot(index, self.dataset['g_yList'], label = "Y")
        ax6, = gyro.plot(index, self.dataset['g_zList'], label = "Z")

        ax7, = activation.plot(index, self.dataset['activation_List'], label = "R")
        
        displaylen = len(self.dataset['activation_List'])

        activation.set_xlim(xmin = 0 , xmax = displaylen )
        activation.set_ylim(ymin = 0.2 , ymax = 2.2 )

        accel.set_xlim(xmin = 0 , xmax = displaylen )
        accel.set_ylim(ymin = -2 , ymax = 2 )

        gyro.set_xlim(xmin = 0 , xmax = displaylen )
        gyro.set_ylim(ymin = -250 , ymax = 250 )
        
        plt.show()

    
        plt.clf()


class dancemove():
    def __init__(self, device, movename, timestamp,a_xList,a_yList,a_zList,g_xList,g_yList,g_zList,activation_List ):
        self.device = device
        self.movename = movename
        self.timestamp = timestamp

        self.a_xList = a_xList
        self.a_yList = a_yList
        self.a_zList = a_zList

        self.g_xList = g_xList
        self.g_yList = g_yList
        self.g_zList = g_zList
        
        self.activation_List = activation_List

    def toDict(self):
        d = dict()
        d['movename'] = self.movename
        d['a_xList'] = self.a_xList
        d['a_yList'] = self.a_yList
        d['a_zList'] = self.a_zList
        d['g_xList'] = self.g_xList
        d['g_yList'] = self.g_yList
        d['g_zList'] = self.g_zList
        d['activation_List'] = self.activation_List
        return d

    def writeThisFile(self,moveid):
        fname = "{}_{}_{}_{}".format(self.device, self.movename, self.timestamp, str(moveid))
        f = os.path.join(BASEPATH,PROCESSEDFOLDER,fname ) 
        if not os.path.exists(os.path.dirname(f)):
            try:
                os.makedirs(os.path.dirname(f))
            except OSError as exc: # Guard against race condition
                if exc.errno != errno.EEXIST:
                    raise
        with open(f, 'w', newline='') as csvfile:
            row = [
                    'a_xList',
                    'a_yList',
                    'a_zList',
                    'g_xList',
                    'g_yList',
                    'g_zList',
                    'activation_List'
                ]
            csvWriter = csv.writer(csvfile, delimiter=',')
            csvWriter.writerow(row)
            for idx in range(len(self.a_xList)):
                row = [
                    self.a_xList[idx],
                    self.a_yList[idx],
                    self.a_zList[idx],
                    self.g_xList[idx],
                    self.g_yList[idx],
                    self.g_zList[idx],
                    self.activation_List[idx]
                ] 
                csvWriter.writerow(row)

    def plotNorm(self, show =True):
        fig = plt.figure()
        gs = gridspec.GridSpec(3, 1, width_ratios=[1], height_ratios=[0.2,1,1])
        index = [ x for x in range(len(self.activation_List))]
        # print(index)
        # TODO plot the activation
        activation = fig.add_subplot(gs[0])
        accel = fig.add_subplot(gs[1])
        gyro = fig.add_subplot(gs[2])

        activation.set_title("{} {} {}".format(self.device, self.movename, self.timestamp) )
        accel.set_title("Accel" )
        gyro.set_title("Gyro" )
        # print( self.a_xList)

        d = self.getDataAsNumpyArray( norm = True)
        ax1, = accel.plot(index, d[0], label = "X")
        ax2, = accel.plot(index, d[1], label = "Y")
        ax3, = accel.plot(index, d[2], label = "Z")

        ax4, = gyro.plot(index, d[3], label = "X")
        ax5, = gyro.plot(index, d[4], label = "Y")
        ax6, = gyro.plot(index, d[5], label = "Z")

        ax7, = activation.plot(index, self.activation_List, label = "R")
        

        activation.set_xlim(xmin = 0 , xmax = len(self.activation_List) )
        activation.set_ylim(ymin = 0 , ymax = 2.2 )

        accel.set_xlim(xmin = 0 , xmax = len(self.activation_List) )
        accel.set_ylim(ymin = 0 , ymax = 1.1 )

        gyro.set_xlim(xmin = 0 , xmax = len(self.activation_List) )
        gyro.set_ylim(ymin = 0 , ymax = 1.1 )

        if show:
            plt.show(block = True)
            plt.clf()

    def plot(self, show =True):
        fig = plt.figure()
        gs = gridspec.GridSpec(3, 1, width_ratios=[1], height_ratios=[0.2,1,1])
        index = [ x for x in range(len(self.activation_List))]
        # print(index)
        # TODO plot the activation
        activation = fig.add_subplot(gs[0])
        accel = fig.add_subplot(gs[1])
        gyro = fig.add_subplot(gs[2])

        activation.set_title("{} {} {}".format(self.device, self.movename, self.timestamp) )
        accel.set_title("Accel" )
        gyro.set_title("Gyro" )
        # print( self.a_xList)

        ax1, = accel.plot(index, self.a_xList, label = "X")
        ax2, = accel.plot(index, self.a_yList, label = "Y")
        ax3, = accel.plot(index, self.a_zList, label = "Z")

        ax4, = gyro.plot(index, self.g_xList, label = "X")
        ax5, = gyro.plot(index, self.g_yList, label = "Y")
        ax6, = gyro.plot(index, self.g_zList, label = "Z")

        ax7, = activation.plot(index, self.activation_List, label = "R")
        

        activation.set_xlim(xmin = 0 , xmax = len(self.activation_List) )
        activation.set_ylim(ymin = 0.2 , ymax = 2.2 )

        accel.set_xlim(xmin = 0 , xmax = len(self.activation_List) )
        accel.set_ylim(ymin = -2 , ymax = 2 )

        gyro.set_xlim(xmin = 0 , xmax = len(self.activation_List) )
        gyro.set_ylim(ymin = -250 , ymax = 250 )

        if show:
            plt.show(block = True)
            plt.clf()

    def print_Data(self):
        print(self.activation_List)

    def get_label(self):
        return self.movename

    def get_data(self):
        return [
            self.a_xList,
            self.a_yList,
            self.a_zList,
            self.g_xList,
            self.g_yList,
            self.g_zList,
            self.activation_List
        ] 

    def get_data_len(self):
        mydata = self.get_data()
        assert all(len(x)==len(mydata[0]) for x in mydata)
        return len(mydata[0])

    
    
    def getDataAsNumpyArray(self, norm = True):
            x = np.array(self.a_xList)
            y = np.array(self.a_yList)
            z = np.array(self.a_zList)
            x2 = np.array(self.g_xList)
            y2 = np.array(self.g_yList)
            z2 = np.array(self.g_zList)
            a = np.array(self.activation_List)

            if norm:
                NormLimit = 1
                x = (x+2)/4 * NormLimit
                y = (y+2)/4 * NormLimit
                z = (z+2)/4 * NormLimit

                x2 = (x2+250)/ (250 * 2) * NormLimit
                y2 = (y2+250)/ (250 * 2) * NormLimit
                z2 = (z2+250)/ (250 * 2) * NormLimit

                x = np.clip(x,0,NormLimit)
                y = np.clip(y,0,NormLimit)
                z = np.clip(z,0,NormLimit)

                x2 = np.clip(x2,0,NormLimit)
                y2 = np.clip(y2,0,NormLimit)
                z2 = np.clip(z2,0,NormLimit)

            
            f = [x,y,z,x2,y2,z2]
            return np.array(f)


def listFiles(savepath):
    filepath = os.path.join(BASEPATH, savepath)
    return os.listdir(filepath)

def extractFileMetadata(fname):
    device, movename, timestamp = fname.split("_")
    return (device, movename, timestamp)

def readRawDataset(fname, savepath):
    filepath = os.path.join(BASEPATH, savepath, fname)

    dataset = {
        'a_xList': [],
        'a_yList': [],
        'a_zList': [],
        'g_xList': [],
        'g_yList': [],
        'g_zList': [],
        'activation_List': []
    }
    with open(filepath, 'r', newline='') as csvfile:
        csvReader = csv.reader(csvfile, delimiter=',')
        count = 0
        for row in csvReader:
            # print(row)
            if count == 0:
                count += 1
                continue
            dataset['a_xList'].append(float(row[0]))
            dataset['a_yList'].append(float(row[1]))
            dataset['a_zList'].append(float(row[2]))
            dataset['g_xList'].append(float(row[3]))
            dataset['g_yList'].append(float(row[4]))
            dataset['g_zList'].append(float(row[5]))
            dataset['activation_List'].append(int(row[6]))
            count += 1
        if IS_PAD:
            for i in range(PAD_NUM):
                dataset['a_xList'].insert(0,dataset['a_xList'][0])
                dataset['a_yList'].insert(0,dataset['a_yList'][0])
                dataset['a_zList'].insert(0,dataset['a_zList'][0])
                dataset['g_xList'].insert(0,dataset['g_xList'][0])
                dataset['g_yList'].insert(0,dataset['g_yList'][0])
                dataset['g_zList'].insert(0,dataset['g_zList'][0])
                dataset['activation_List'].insert(0,dataset['activation_List'][0])

                dataset['a_xList'].append(dataset['a_xList'][-1])
                dataset['a_yList'].append(dataset['a_yList'][-1])
                dataset['a_zList'].append(dataset['a_zList'][-1])
                dataset['g_xList'].append(dataset['g_xList'][-1])
                dataset['g_yList'].append(dataset['g_yList'][-1])
                dataset['g_zList'].append(dataset['g_zList'][-1])
                dataset['activation_List'].append(dataset['activation_List'][-1])


        device, movename, timestamp = extractFileMetadata(fname)
        print("Recording from {} with move {} at {} opened with {} samples".format(device, movename,timestamp ,count - 1))

        r = rawDataset(device, movename, timestamp, dataset)
        return r

def isolateSequences(rawdata, useAccelBaseValueAugmentation =True , useTemporalAugmentation =True):
    moveIdxs = []
    device = rawdata.device
    movename = rawdata.movename
    timestamp = rawdata.timestamp
    d = rawdata.dataset

    numberOfSamples = len(d['a_xList'])
    isInMove=False
    startIdx = None
    endIdx = None
    for idx in range(numberOfSamples):
        currentActivation = d['activation_List'][idx]
        if (currentActivation == 2) and (isInMove == False):
            isInMove = True
            startIdx = idx
            cooldown = MINIMUM_MOVE_TIME
        elif (isInMove == True) and (not currentActivation == 2):
            isInMove = False
            endIdx = idx
            moveIdxs.append( (startIdx,endIdx ) )

    movesData = []
    for start,end in moveIdxs:
        
        if (end - start) < MINIMUM_MOVE_TIME:
            continue

        if USE_MAX_SAMPLES:
            if useTemporalAugmentation:
                for i in range(-TEMPORALDATAAUGNUM*2,(TEMPORALDATAAUGNUM+1)*2,2 ):
                    localStart = start - NUMBER_OF_BEFORE_SAMPLES
                    requiresPad = 0
                    if localStart<0:
                        requiresPad = abs(localStart)
                        localStart = 0
                    localEnd = localStart + MAX_SAMPLES
                    a_xList = d['a_xList'][localStart+i:localEnd+i]
                    a_yList = d['a_yList'][localStart+i:localEnd+i]
                    a_zList = d['a_zList'][localStart+i:localEnd+i]
                    g_xList = d['g_xList'][localStart+i:localEnd+i]
                    g_yList = d['g_yList'][localStart+i:localEnd+i]
                    g_zList = d['g_zList'][localStart+i:localEnd+i]
                    activation_List = d['activation_List'][localStart+i:localEnd+i]

                    if len(a_xList)==0:
                        print("DATAERROR")
                        print(len(a_xList),len(a_yList),len(a_zList),len(g_xList),len(g_yList),len(g_zList) )
                        print(len(d['a_xList']),len( d['a_yList']),len(d['a_zList']),len(d['g_xList']),len(d['g_yList']),len(d['g_zList']) )
                        print(localStart+i,localEnd+i)

                        assert False
                    dm = dancemove(device, movename, timestamp,a_xList,a_yList,a_zList,g_xList,g_yList,g_zList,activation_List)
                    movesData.append(dm)
                    
                if useAccelBaseValueAugmentation:
                    for i in range(NUMRANDOMSHIFTSACCEL):
                        localStart = start - NUMBER_OF_BEFORE_SAMPLES
                        localEnd = localStart + MAX_SAMPLES
                        z_rand = random.uniform(0, Z_RAND_MAX)
                        y_rand = random.uniform(0, z_rand)
                        x_rand = z_rand - y_rand
                        a_xList = list( map( lambda x: x + x_rand, a_xList) )
                        a_yList = list( map( lambda x: x + y_rand, a_yList) )
                        a_zList = list( map( lambda x: x - z_rand, a_zList) )
                        # g_xList = d['g_xList'][localStart:localEnd]
                        # g_yList = d['g_yList'][localStart:localEnd]
                        # g_zList = d['g_zList'][localStart:localEnd]
                        # activation_List = d['activation_List'][localStart:localEnd]
                        dm = dancemove(device, movename, timestamp,a_xList,a_yList,a_zList,g_xList,g_yList,g_zList,activation_List)
                        movesData.append(dm)

            if not useAccelBaseValueAugmentation and not useTemporalAugmentation:
                    localStart = start - NUMBER_OF_BEFORE_SAMPLES
                    localEnd = localStart + MAX_SAMPLES
                    a_xList = d['a_xList'][localStart:localEnd]
                    a_yList = d['a_yList'][localStart:localEnd]
                    a_zList = d['a_zList'][localStart:localEnd]
                    g_xList = d['g_xList'][localStart:localEnd]
                    g_yList = d['g_yList'][localStart:localEnd]
                    g_zList = d['g_zList'][localStart:localEnd]
                    activation_List = d['activation_List'][localStart:localEnd]
                    dm = dancemove(device, movename, timestamp,a_xList,a_yList,a_zList,g_xList,g_yList,g_zList,activation_List)
                    movesData.append(dm)

        else:
            a_xList = d['a_xList'][start - NUMBER_OF_BEFORE_SAMPLES: end + NUMBER_OF_AFTER_SAMPLES ]
            a_yList = d['a_yList'][start - NUMBER_OF_BEFORE_SAMPLES: end + NUMBER_OF_AFTER_SAMPLES ]
            a_zList = d['a_zList'][start - NUMBER_OF_BEFORE_SAMPLES: end + NUMBER_OF_AFTER_SAMPLES ]
            g_xList = d['g_xList'][start - NUMBER_OF_BEFORE_SAMPLES: end + NUMBER_OF_AFTER_SAMPLES ]
            g_yList = d['g_yList'][start - NUMBER_OF_BEFORE_SAMPLES: end + NUMBER_OF_AFTER_SAMPLES ]
            g_zList = d['g_zList'][start - NUMBER_OF_BEFORE_SAMPLES: end + NUMBER_OF_AFTER_SAMPLES ]
            activation_List = d['activation_List'][start - NUMBER_OF_BEFORE_SAMPLES: end + NUMBER_OF_AFTER_SAMPLES ]


            dm = dancemove(device, movename, timestamp,a_xList,a_yList,a_zList,g_xList,g_yList,g_zList,activation_List)
            movesData.append(dm)
    return movesData



def processData(dancer, testset=False):
    
    if not testset:
        SAVEFOLDER = 'dataset/raw/train/' + dancer
        raws = list(map(lambda x:readRawDataset(x, SAVEFOLDER), listFiles(SAVEFOLDER)))
    else:
        TESTFOLDER = 'dataset/raw/test/' + dancer
        raws = list(map(lambda x:readRawDataset(x ,savepath= TESTFOLDER), listFiles(savepath= TESTFOLDER)))

    combinedList = []
    numberOfMoves = 0
    for item in raws:
        # item.plot()
        moves = isolateSequences(item, useTemporalAugmentation = USETEMP, useAccelBaseValueAugmentation= USEACCEL )
        numberOfMoves += len(moves)
        combinedList.extend(moves)
    print("Done")
    print("Extracted {} moves from {} raw data Sequences.".format(numberOfMoves, len(raws)))

    numberOfEachMoves = dict.fromkeys(DANCEMOVENAMES ,0)
    numberOfEachMoves["defaultMove"] = 0
    for item in combinedList:
        numberOfEachMoves[item.movename] += 1
        
    for k,v in numberOfEachMoves.items():
        print("{}  {}".format(v, k))

    # for item in combinedList:
    #     item.plot()

#     for idx,item in enumerate(combinedList):
#         item.writeThisFile(idx)   
  
    if IS_RETURN_DATAFRAME:
        frame = pd.DataFrame()
        c = list(map(lambda x : x.toDict(),combinedList ))
        for df in c:
            frame = frame.append(df, ignore_index=True)
        return frame

    return combinedList

### Generate training and test dataset

In [3]:
dancers = ['Alex', 'Abi', 'CJ', 'Ryan', 'XY']

In [4]:
def dancer_data_train(dancers):
    df = pd.DataFrame()
    for dancer in dancers:
        print("\nProcessing {}'s dance moves as training set:" .format(dancer))
        df_temp = processData(dancer, testset=False)
        df_temp['Dancer'] = dancer
        df = pd.concat([df, df_temp], axis=0, ignore_index=True)
    return df

def dancer_data_test(dancers):
    df = pd.DataFrame()
    for dancer in dancers:
        print("\nProcessing {}'s dance moves as test set:" .format(dancer))
        df_temp = processData(dancer, testset=True)
        df_temp['Dancer'] = dancer
        df = pd.concat([df, df_temp], axis=0, ignore_index=True)
    return df

In [5]:
dance_move_training = dancer_data_train(dancers)
dance_move_training


Processing Alex's dance moves as training set:
Recording from dev1 with move sidepump at 16164027138856 opened with 2108 samples
Recording from dev1 with move gun at 16164021898786 opened with 1945 samples
Recording from dev1 with move hair at 16164023666271 opened with 1836 samples
Done
Extracted 672 moves from 3 raw data Sequences.
0  dab
0  elbowkick
272  gun
192  hair
0  listen
0  pointhigh
208  sidepump
0  wipetable
0  defaultMove

Processing Abi's dance moves as training set:
Recording from dev3 with move gun at 16164008859156 opened with 3107 samples
Recording from dev3 with move sidepump at 16164005943414 opened with 3423 samples
Recording from dev3 with move hair at 16164010930352 opened with 2308 samples
Done
Extracted 848 moves from 3 raw data Sequences.
0  dab
0  elbowkick
368  gun
192  hair
0  listen
0  pointhigh
288  sidepump
0  wipetable
0  defaultMove

Processing CJ's dance moves as training set:
Recording from dev2 with move gun at 16164001747106 opened with 5092 samp

Unnamed: 0,a_xList,a_yList,a_zList,activation_List,g_xList,g_yList,g_zList,movename,Dancer
0,"[0.3469256626080526, 0.33315539756483153, 0.32...","[-0.27917871527082117, -0.2800072291624927, -0...","[-0.9677316655029782, -0.9556389993017869, -0....","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...","[1.953125, -7.8125, -9.765625, -7.8125, -1.953...","[7.8125, 3.90625, 1.953125, 0.0, 0.0, -1.95312...","[1.953125, -3.90625, -7.8125, -11.71875, -7.81...",sidepump,Alex
1,"[0.32489323853889895, 0.3199359431233394, 0.31...","[-0.29300433749749566, -0.3070526024984974, -0...","[-0.9608833995810722, -0.9640300397486432, -0....","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, ...","[-9.765625, -7.8125, -1.953125, 1.953125, 3.90...","[1.953125, 0.0, 0.0, -1.953125, -3.90625, -1.9...","[-7.8125, -11.71875, -7.8125, -3.90625, -3.906...",sidepump,Alex
2,"[0.3169615658740036, 0.3151769395244022, 0.326...","[-0.31548156149909845, -0.32053893689945906, -...","[-0.972168023849186, -0.9770508143095116, -0.9...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, ...","[-1.953125, 1.953125, 3.90625, 7.8125, 5.85937...","[0.0, -1.953125, -3.90625, -1.953125, 0.0, 1.9...","[-7.8125, -3.90625, -3.90625, -3.90625, 1.9531...",sidepump,Alex
3,"[0.32660616371464135, 0.33971369822878483, 0.3...","[-0.31732336213967544, -0.3028940172838053, -0...","[-0.979980488585707, -0.9754882931514242, -0.9...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, ...","[3.90625, 7.8125, 5.859375, 7.8125, 15.625, 27...","[-3.90625, -1.953125, 0.0, 1.953125, 1.953125,...","[-3.90625, -3.90625, 1.953125, -1.953125, -5.8...",sidepump,Alex
4,"[0.3475782189372709, 0.3585469313623626, 0.371...","[-0.28798641037028316, -0.3040418462221699, -0...","[-0.9727929758908544, -0.9774257855345126, -0....","[1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, ...","[5.859375, 7.8125, 15.625, 27.34375, 46.875, 6...","[0.0, 1.953125, 1.953125, -1.953125, 1.953125,...","[1.953125, -1.953125, -5.859375, -21.484375, -...",sidepump,Alex
...,...,...,...,...,...,...,...,...,...
3851,"[-0.019417480963217687, 0.08843478962169114, 0...","[-0.39973934394069494, -0.08941669696540583, 0...","[-1.1962986818199635, -0.8495413966906109, -0....","[2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, ...","[-39.0625, -119.140625, -203.125, -248.046875,...","[-39.0625, 48.828125, 85.9375, 78.125, 52.7343...","[-191.40625, -144.53125, -74.21875, -11.71875,...",sidepump,XY
3852,"[-0.009999931039994511, 0.09785233954491432, 0...","[-0.37492624336007535, -0.06460359638478627, 0...","[-1.2305293323238062, -0.8837720471944537, -0....","[2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, ...","[-39.0625, -119.140625, -203.125, -248.046875,...","[-39.0625, 48.828125, 85.9375, 78.125, 52.7343...","[-191.40625, -144.53125, -74.21875, -11.71875,...",sidepump,XY
3853,"[0.005438519676126741, 0.11329079026103557, 0....","[-0.36571012818345294, -0.05538748120816388, 0...","[-1.25518389821655, -0.9084266130871973, -0.66...","[2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, ...","[-39.0625, -119.140625, -203.125, -248.046875,...","[-39.0625, 48.828125, 85.9375, 78.125, 52.7343...","[-191.40625, -144.53125, -74.21875, -11.71875,...",sidepump,XY
3854,"[0.006045487739939175, 0.113897758324848, 0.05...","[-0.3414566283449842, -0.03113398136969515, 0....","[-1.2800443661188312, -0.9332870809894784, -0....","[2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, ...","[-39.0625, -119.140625, -203.125, -248.046875,...","[-39.0625, 48.828125, 85.9375, 78.125, 52.7343...","[-191.40625, -144.53125, -74.21875, -11.71875,...",sidepump,XY


In [6]:
dance_move_testing = dancer_data_test(dancers)
dance_move_testing


Processing Alex's dance moves as test set:
Recording from dev1 with move gun at 16164022461072 opened with 641 samples
Recording from dev1 with move hair at 16164024790779 opened with 822 samples
Recording from dev1 with move sidepump at 16164027729986 opened with 811 samples
Done
Extracted 240 moves from 3 raw data Sequences.
0  dab
0  elbowkick
80  gun
80  hair
0  listen
0  pointhigh
80  sidepump
0  wipetable
0  defaultMove

Processing Abi's dance moves as test set:
Recording from dev3 with move sidepump at 16164007065778 opened with 1453 samples
Recording from dev3 with move hair at 16164011636886 opened with 1120 samples
Recording from dev3 with move gun at 16164009335203 opened with 644 samples
Done
Extracted 304 moves from 3 raw data Sequences.
0  dab
0  elbowkick
80  gun
80  hair
0  listen
0  pointhigh
144  sidepump
0  wipetable
0  defaultMove

Processing CJ's dance moves as test set:
Recording from dev2 with move sidepump at 16163993461918 opened with 2586 samples
Recording fr

Unnamed: 0,a_xList,a_yList,a_zList,activation_List,g_xList,g_yList,g_zList,movename,Dancer
0,"[0.27363811114079917, 0.2829328666844795, 0.28...","[-0.2602651178071856, -0.2874090706843113, -0....","[-1.0212616738656028, -1.0190070043193615, -1....","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...","[1.953125, 7.8125, 11.71875, 17.578125, 21.484...","[5.859375, 11.71875, 13.671875, 13.671875, 13....","[-11.71875, -9.765625, -3.90625, 0.0, 1.953125...",gun,Alex
1,"[0.28225972001068766, 0.27560583200641264, 0.2...","[-0.2974454424105868, -0.30346726544635205, -0...","[-1.0176542025916169, -1.0168425215549701, -1....","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, ...","[11.71875, 17.578125, 21.484375, 21.484375, 19...","[13.671875, 13.671875, 13.671875, 9.765625, 9....","[-3.90625, 0.0, 1.953125, 0.0, 3.90625, 5.8593...",gun,Alex
2,"[0.2716134992038476, 0.2692180995223086, 0.267...","[-0.3008303592678112, -0.2929982155606867, -0....","[-1.0101055129329821, -0.9998133077597893, -0....","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, ...","[21.484375, 21.484375, 19.53125, 15.625, 9.765...","[13.671875, 9.765625, 9.765625, 5.859375, 1.95...","[1.953125, 0.0, 3.90625, 5.859375, 0.0, -1.953...",gun,Alex
3,"[0.26778085971338517, 0.2606685158280311, 0.26...","[-0.282048929336412, -0.2692293576018472, -0.2...","[-0.9936379846558736, -0.9836827907935242, -0....","[1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, ...","[19.53125, 15.625, 9.765625, 3.90625, 0.0, -1....","[9.765625, 5.859375, 1.953125, -1.953125, -3.9...","[3.90625, 5.859375, 0.0, -1.953125, -7.8125, -...",gun,Alex
4,"[0.2626511094968187, 0.2700906656980912, 0.280...","[-0.2615376145611083, -0.25692256873666497, -0...","[-0.9777096744761145, -0.9678758046856686, -0....","[1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, ...","[9.765625, 3.90625, 0.0, -1.953125, 3.90625, 1...","[1.953125, -1.953125, -3.90625, -5.859375, -7....","[0.0, -1.953125, -7.8125, -7.8125, -9.765625, ...",gun,Alex
...,...,...,...,...,...,...,...,...,...
1595,"[0.47465965251260134, 0.25430474382914503, 0.0...","[-0.2639539299997518, -0.03548347086168461, 0....","[-1.1385765559316234, -0.9442937730187249, -0....","[2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, ...","[103.515625, 35.15625, -56.640625, -132.8125, ...","[-39.0625, -27.34375, -52.734375, -42.96875, -...","[-35.15625, -23.4375, -25.390625, 5.859375, 19...",sidepump,XY
1596,"[0.4807982927837836, 0.2604433841003273, 0.053...","[-0.25774910431699366, -0.029278645178926435, ...","[-1.150920021885564, -0.9566372389726653, -0.7...","[2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, ...","[103.515625, 35.15625, -56.640625, -132.8125, ...","[-39.0625, -27.34375, -52.734375, -42.96875, -...","[-35.15625, -23.4375, -25.390625, 5.859375, 19...",sidepump,XY
1597,"[0.48602144521903096, 0.26566653653557465, 0.0...","[-0.24920840900185467, -0.020737949863787436, ...","[-1.1646838696359503, -0.9704010867230516, -0....","[2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, ...","[103.515625, 35.15625, -56.640625, -132.8125, ...","[-39.0625, -27.34375, -52.734375, -42.96875, -...","[-35.15625, -23.4375, -25.390625, 5.859375, 19...",sidepump,XY
1598,"[0.5054682092046969, 0.2851133005212406, 0.077...","[-0.2282220207654117, 0.00024843837265553503, ...","[-1.2051170218580591, -1.0108342389451606, -0....","[2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, ...","[103.515625, 35.15625, -56.640625, -132.8125, ...","[-39.0625, -27.34375, -52.734375, -42.96875, -...","[-35.15625, -23.4375, -25.390625, 5.859375, 19...",sidepump,XY


In [7]:
import statistics
from spectrum import aryule
from scipy.stats import pearsonr
from scipy.stats import entropy
from math import sqrt
from scipy.fft import fft, fftfreq
from scipy.stats import kurtosis
from scipy.stats import skew

In [8]:
features_train = pd.DataFrame()
features_test = pd.DataFrame()

In [9]:
def get_mean(x):
    data = []
    data_temp = []
    for col in range(x.shape[1]):
        for row in range(x.shape[0]):
            data_temp.append(np.mean(x.iloc[row, col]))
            if row == x.shape[0]-1:
                data.append(data_temp)
                data_temp = []
    return np.array(data).T


def get_std(x):
    data = []
    data_temp = []
    for col in range(x.shape[1]):
        for row in range(x.shape[0]):
            data_temp.append(np.std(x.iloc[row, col]))
            if row == x.shape[0]-1:
                data.append(data_temp)
                data_temp = []
    return np.array(data).T


def fourier_transform(x):
    data = []
    data_temp = []
    for col in range(x.shape[1]):
        for row in range(x.shape[0]):
            data_temp.append(np.fft.fft(xyz_acc_mean.values[row][col]))
            if row == x.shape[0]-1:
                data.append(data_temp)
                data_temp = []
    return np.array(data).T


def get_mag(val):
    data = []
    for i in range(len(val)):
        x, y, z = val.values[i]
        data.append(sqrt(pow(x, 2) + pow(y, 2) + pow(z, 2)))
    return np.array(data).T


def get_kurtosis(x):
    data = []
    data_temp = []
    for col in range(x.shape[1]):
        for row in range(x.shape[0]):
            data_temp.append(kurtosis(x.iloc[row, col]))
            if row == x.shape[0]-1:
                data.append(data_temp)
                data_temp = []
    return np.array(data).T


def get_skew(x):
    data = []
    data_temp = []
    for col in range(x.shape[1]):
        for row in range(x.shape[0]):
            data_temp.append(skew(x.iloc[row, col]))
            if row == x.shape[0]-1:
                data.append(data_temp)
                data_temp = []
    return np.array(data).T 

In [10]:
def extract_features(dataset):
    xyz_acc = dataset.iloc[0:, 0:3]
    xyz_gyro = dataset.iloc[0:, 4:7]
    dancer = dataset.iloc[0:, 8]
    move = dataset.iloc[0:, 7]
    xyz_acc_mean = pd.DataFrame(get_mean(xyz_acc), columns=['acc_X_mean', 'acc_Y_mean', 'acc_Z_mean'])
    xyz_acc_std = pd.DataFrame(get_std(xyz_acc), columns=['acc_X_std', 'acc_Y_std', 'acc_Z_std'])
    xyz_acc_mag = pd.DataFrame(get_mag(xyz_acc_mean), columns=['acc_mag'])
    xyz_acc_kurtosis = pd.DataFrame(get_kurtosis(xyz_acc), columns=['acc_X_kurtosis', 'acc_Y_kurtosis', 'acc_Z_kurtosis'])
    xyz_acc_skew = pd.DataFrame(get_skew(xyz_acc), columns=['acc_X_skew', 'acc_Y_skew', 'acc_Z_skew'])
    xyz_gyro_mean = pd.DataFrame(get_mean(xyz_gyro), columns=['gyro_X_mean', 'gyro_Y_mean', 'gyro_Z_mean'])
    xyz_gyro_std = pd.DataFrame(get_std(xyz_gyro), columns=['gyro_X_std', 'gyro_Y_std', 'gyro_Z_std'])
    xyz_gyro_mag = pd.DataFrame(get_mag(xyz_gyro_mean), columns=['gyro_mag'])
    xyz_gyro_kurtosis = pd.DataFrame(get_kurtosis(xyz_gyro), columns=['gyro_X_kurtosis', 'gyro_Y_kurtosis', 'gyro_Z_kurtosis'])
    xyz_gyro_skew = pd.DataFrame(get_skew(xyz_gyro), columns=['gyro_X_skew', 'gyro_Y_skew', 'gyro_Z_skew'])
    
    extracted_features = pd.concat([xyz_acc_mean, xyz_acc_std, xyz_acc_mag,
                                    xyz_acc_kurtosis, xyz_acc_skew,
                                    xyz_gyro_mean, xyz_gyro_std, xyz_gyro_mag,
                                    xyz_gyro_kurtosis, xyz_gyro_skew,
                                    dancer, move], axis=1)
    return extracted_features

In [11]:
extracted_features_training = extract_features(dance_move_training)

In [12]:
extracted_features_training

Unnamed: 0,acc_X_mean,acc_Y_mean,acc_Z_mean,acc_X_std,acc_Y_std,acc_Z_std,acc_mag,acc_X_kurtosis,acc_Y_kurtosis,acc_Z_kurtosis,acc_X_skew,acc_Y_skew,acc_Z_skew,gyro_X_mean,gyro_Y_mean,gyro_Z_mean,gyro_X_std,gyro_Y_std,gyro_Z_std,gyro_mag,gyro_X_kurtosis,gyro_Y_kurtosis,gyro_Z_kurtosis,gyro_X_skew,gyro_Y_skew,gyro_Z_skew,Dancer,movename
0,0.325903,-0.519305,-0.412784,0.549820,0.447848,0.518816,0.739108,-0.354886,-0.378593,-0.715202,-0.746062,-0.279321,0.221182,-17.324219,-10.117188,0.781250,72.215278,75.421676,75.601329,20.077261,-0.274461,1.350727,4.334908,-0.768383,-0.108224,-1.893026,Alex,sidepump
1,0.325575,-0.519750,-0.413090,0.549821,0.447622,0.519143,0.739447,-0.356695,-0.374915,-0.717553,-0.744268,-0.277540,0.220508,-17.070312,-9.941406,1.132812,72.286536,75.468555,75.633201,19.786622,-0.273977,1.341151,4.357877,-0.775745,-0.114498,-1.904374,Alex,sidepump
2,0.324332,-0.519810,-0.413312,0.549899,0.447592,0.519381,0.739067,-0.364883,-0.374436,-0.719263,-0.737206,-0.277289,0.220024,-16.914062,-9.843750,1.464844,72.312706,75.487672,75.623716,19.624747,-0.271151,1.337400,4.395134,-0.781193,-0.118137,-1.918170,Alex,sidepump
3,0.323197,-0.519732,-0.413130,0.549975,0.447628,0.519185,0.738413,-0.372393,-0.375015,-0.717869,-0.730745,-0.277638,0.220432,-17.031250,-9.980469,1.250000,72.289945,75.478058,75.660100,19.779680,-0.272820,1.338770,4.359282,-0.777257,-0.112780,-1.907157,Alex,sidepump
4,0.322363,-0.519991,-0.412989,0.549991,0.447509,0.519033,0.738152,-0.377106,-0.373090,-0.716802,-0.726140,-0.276487,0.220761,-17.285156,-10.000000,1.152344,72.230975,75.476736,75.676018,20.002613,-0.274995,1.338957,4.343288,-0.769209,-0.112013,-1.902188,Alex,sidepump
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3851,0.499546,-0.035776,-0.629366,0.415969,0.490331,0.523213,0.804318,0.662682,4.044871,-1.060848,-0.962199,-2.126935,-0.109458,-26.582031,-10.625000,8.945312,84.802472,51.425689,94.940030,29.991893,1.980038,2.393430,1.751305,-1.196690,-1.528542,-0.984029,XY,sidepump
3852,0.508963,-0.010963,-0.663597,0.415969,0.490331,0.523213,0.836376,0.662682,4.044871,-1.060848,-0.962199,-2.126935,-0.109458,-26.582031,-10.625000,8.945312,84.802472,51.425689,94.940030,29.991893,1.980038,2.393430,1.751305,-1.196690,-1.528542,-0.984029,XY,sidepump
3853,0.524402,-0.001747,-0.688251,0.415969,0.490331,0.523213,0.865269,0.662682,4.044871,-1.060848,-0.962199,-2.126935,-0.109458,-26.582031,-10.625000,8.945312,84.802472,51.425689,94.940030,29.991893,1.980038,2.393430,1.751305,-1.196690,-1.528542,-0.984029,XY,sidepump
3854,0.525009,0.022507,-0.713112,0.415969,0.490331,0.523213,0.885815,0.662682,4.044871,-1.060848,-0.962199,-2.126935,-0.109458,-26.582031,-10.625000,8.945312,84.802472,51.425689,94.940030,29.991893,1.980038,2.393430,1.751305,-1.196690,-1.528542,-0.984029,XY,sidepump


In [13]:
extracted_features_testing = extract_features(dance_move_testing)

In [14]:
extracted_features_testing

Unnamed: 0,acc_X_mean,acc_Y_mean,acc_Z_mean,acc_X_std,acc_Y_std,acc_Z_std,acc_mag,acc_X_kurtosis,acc_Y_kurtosis,acc_Z_kurtosis,acc_X_skew,acc_Y_skew,acc_Z_skew,gyro_X_mean,gyro_Y_mean,gyro_Z_mean,gyro_X_std,gyro_Y_std,gyro_Z_std,gyro_mag,gyro_X_kurtosis,gyro_Y_kurtosis,gyro_Z_kurtosis,gyro_X_skew,gyro_Y_skew,gyro_Z_skew,Dancer,movename
0,0.256790,-0.462554,-0.784829,0.130087,0.591135,0.371541,0.946495,-0.224492,2.002373,-0.555826,-0.382008,0.578464,0.735185,0.429688,-5.937500,-2.285156,136.164216,27.500832,49.382057,6.376557,-0.071116,3.116190,2.278384,-0.150386,-1.422626,0.155111,Alex,gun
1,0.257769,-0.462569,-0.784113,0.130428,0.591126,0.371121,0.946176,-0.240044,2.002696,-0.551996,-0.398504,0.578550,0.733888,0.390625,-6.152344,-2.109375,136.162653,27.423676,49.367088,6.515626,-0.071154,3.138564,2.282634,-0.149532,-1.414567,0.144675,Alex,gun
2,0.258747,-0.461937,-0.783456,0.130765,0.591316,0.370737,0.945589,-0.254675,1.993933,-0.548460,-0.414879,0.575078,0.732676,0.097656,-6.464844,-2.128906,136.146788,27.284667,49.366636,6.807054,-0.071051,3.193697,2.283057,-0.143155,-1.409535,0.145865,Alex,gun
3,0.259899,-0.461397,-0.783044,0.131116,0.591479,0.370501,0.945301,-0.267562,1.986450,-0.546314,-0.434982,0.572115,0.731832,-0.371094,-6.738281,-2.226562,136.112690,27.167927,49.365032,7.106316,-0.070031,3.239446,2.284921,-0.133009,-1.403701,0.151807,Alex,gun
4,0.261176,-0.461428,-0.782958,0.131461,0.591469,0.370453,0.945596,-0.277500,1.986906,-0.545886,-0.457919,0.572282,0.731636,-0.761719,-6.914062,-2.363281,136.088361,27.100509,49.354425,7.346399,-0.069421,3.263453,2.291185,-0.124516,-1.397798,0.160152,Alex,gun
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1595,0.357483,-0.292847,-0.593815,0.521528,0.612409,0.574101,0.752442,-0.621114,0.774047,-1.484621,-0.392547,-1.443782,-0.121935,-17.128906,-18.906250,-0.136719,99.986592,52.921312,89.290183,25.512044,0.551662,0.457469,1.608749,-0.589082,-0.734662,-0.796596,XY,sidepump
1596,0.363621,-0.286642,-0.606159,0.521528,0.612409,0.574101,0.762766,-0.621114,0.774047,-1.484621,-0.392547,-1.443782,-0.121935,-17.128906,-18.906250,-0.136719,99.986592,52.921312,89.290183,25.512044,0.551662,0.457469,1.608749,-0.589082,-0.734662,-0.796596,XY,sidepump
1597,0.368845,-0.278101,-0.619922,0.521528,0.612409,0.574101,0.773104,-0.621114,0.774047,-1.484621,-0.392547,-1.443782,-0.121935,-17.128906,-18.906250,-0.136719,99.986592,52.921312,89.290183,25.512044,0.551662,0.457469,1.608749,-0.589082,-0.734662,-0.796596,XY,sidepump
1598,0.388291,-0.257115,-0.660356,0.521528,0.612409,0.574101,0.808052,-0.621114,0.774047,-1.484621,-0.392547,-1.443782,-0.121935,-17.128906,-18.906250,-0.136719,99.986592,52.921312,89.290183,25.512044,0.551662,0.457469,1.608749,-0.589082,-0.734662,-0.796596,XY,sidepump


In [15]:
extracted_features_training.to_csv('dataset/extracted_features/train.csv', index=False)
extracted_features_testing.to_csv('dataset/extracted_features/test.csv', index=False)

In [16]:
X = extracted_features_training.iloc[:, :-2]
X_test = extracted_features_testing.iloc[:, :-2]

In [396]:
y_dancer = extracted_features_training.iloc[:, -2]
y_dancer

0       Alex
1       Alex
2       Alex
3       Alex
4       Alex
        ... 
3851      XY
3852      XY
3853      XY
3854      XY
3855      XY
Name: Dancer, Length: 3856, dtype: object

In [398]:
y_dancer_test = extracted_features_testing.iloc[:, -2]
y_dancer_test

0       Alex
1       Alex
2       Alex
3       Alex
4       Alex
        ... 
1595      XY
1596      XY
1597      XY
1598      XY
1599      XY
Name: Dancer, Length: 1600, dtype: object

In [399]:
encoder_dancer = preprocessing.LabelEncoder()
y_dancer = encoder_dancer.fit_transform(y_dancer)
y_dancer_test = encoder_dancer.fit_transform(y_dancer_test)

In [400]:
encoder_dancer.classes_

array(['Abi', 'Alex', 'CJ', 'Ryan', 'XY'], dtype=object)

In [18]:
encoder = preprocessing.LabelEncoder()
y = encoder.fit_transform(y)
y_test = encoder.fit_transform(y_test)

In [19]:
encoder.classes_

array(['gun', 'hair', 'sidepump'], dtype=object)

In [20]:
train_histories = []
cm_hist = []
classification_report_hist = []

In [21]:
def perform_mlp(X_test, y_test, fold, pca):
    start_time = timer()
    k = fold
    perform_pca = pca
    number_of_classes = 3
    kf = KFold(n_splits=k, shuffle=True)
    #kf = StratifiedKFold(n_splits=k, shuffle=True)

    acc_scores = []
    cv_iteration = 1
    cv_pca_iteration = 1
    train_histories.clear()
    cm_hist.clear()
    classification_report_hist.clear()
    
    for train_index, val_index in kf.split(X):
        
        if perform_pca == False:
            print('\nTraining model and cross validate using fold #{}...\n ' .format(cv_iteration))
            cv_iteration += 1
        
        X_train , X_val = X.iloc[train_index,:], X.iloc[val_index,:]
        y_train , y_val = y[train_index], y[val_index]
        
        y_val_without_transform = y_val
        
        scaler = StandardScaler()
        X_train = scaler.fit_transform(X_train)
        X_val = scaler.transform(X_val)
        X_test_scaled = scaler.transform(X_test)
        
        y_train = to_categorical(y_train, number_of_classes)
        y_val = to_categorical(y_val, number_of_classes)
        y_test_categorical = to_categorical(y_test, number_of_classes)
        
        if perform_pca == True:
            print('\nTraining model with PCA and cross validate using fold #{}...\n ' .format(cv_pca_iteration))
            cv_pca_iteration += 1
            pca = PCA(n_components = 12)
            X_train = pca.fit_transform(X_train)
            X_val = pca.transform(X_val)
#             pca.explained_variance_ratio_

        def mlp_model():
            model = Sequential()
#             model.add(Flatten(input_shape=X_train[0].shape))
            model.add(Dense(units=32, kernel_initializer='uniform', activation='relu', input_shape=X_train[0].shape))
#             model.add(Dense(units=64, kernel_initializer='uniform', activation='relu'))
            model.add(Dropout(0.1))
            model.add(Dense(units=16, kernel_initializer='uniform', activation='relu'))
#             model.add(Flatten())
            model.add(Dense(units=number_of_classes, kernel_initializer='uniform', activation='softmax'))
            model.compile(optimizer=Adam(learning_rate=0.001), loss='categorical_crossentropy', metrics=['accuracy'])
            return model
        
        mlp = mlp_model()
        print(mlp.summary())        
        
#         checkpoint_filepath="MLP_weights_checkpoint.hdf5"
                
        my_callbacks = [
            EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=20),
            ReduceLROnPlateau(monitor='val_accuracy', factor=0.1, min_delta=0.00001, patience=20, verbose=1),
#             ModelCheckpoint(filepath = checkpoint_filepath, save_weights_only=True, monitor='val_accuracy',
#                             verbose=1, save_best_only=True, mode='max')  
        ] 
        
        history = mlp.fit(X_train, y_train, batch_size=64, epochs=200, validation_data=(X_val, y_val),
                                  callbacks=[my_callbacks], shuffle=True)
        
        mlp_pred = np.argmax(mlp.predict(X_test_scaled), axis=-1)
        scores = mlp.evaluate(X_test_scaled, y_test_categorical, batch_size=64, verbose=0)
        acc_scores.append(scores[1])
        train_histories.append(history.history)
        
#         mlp_weights = mlp.get_weights()
#         print("MLP Weights:", mlp_weights)
        
#         mlp.save('saved_models/MLP_99.6_accuracy')
        
        print('y_test\n', y_test)
        print('')
        print('mlp_pred\n', mlp_pred)
        
        cm_hist.append(confusion_matrix(y_test, mlp_pred))
        classification_report_hist.append(classification_report(y_test, mlp_pred, target_names=encoder.classes_))
        

    end_time = timer()
    time_taken = end_time - start_time

    return mlp, acc_scores, time_taken

In [22]:
mlp_model, acc_scores, time_taken = perform_mlp(X_test, y_test, fold=5, pca=False)


Training model and cross validate using fold #1...
 
Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 32)                864       
_________________________________________________________________
dropout (Dropout)            (None, 32)                0         
_________________________________________________________________
dense_1 (Dense)              (None, 16)                528       
_________________________________________________________________
dense_2 (Dense)              (None, 3)                 51        
Total params: 1,443
Trainable params: 1,443
Non-trainable params: 0
_________________________________________________________________
None
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 1

Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200

Epoch 00034: ReduceLROnPlateau reducing learning rate to 0.00010000000474974513.
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200

Epoch 00054: ReduceLROnPlateau reducing learning rate to 1.0000000474974514e-05.
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200


Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200

Epoch 00074: ReduceLROnPlateau reducing learning rate to 1.0000000656873453e-06.
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78/200
Epoch 79/200
Epoch 80/200
Epoch 81/200
Epoch 82/200
Epoch 83/200
Epoch 84/200
Epoch 85/200
Epoch 86/200
Epoch 87/200
Epoch 88/200
Epoch 89/200
Epoch 90/200
Epoch 91/200
Epoch 92/200
Epoch 93/200
Epoch 94/200

Epoch 00094: ReduceLROnPlateau reducing learning rate to 1.0000001111620805e-07.
Epoch 95/200
Epoch 96/200
Epoch 97/200
Epoch 98/200
Epoch 99/200
Epoch 100/200
Epoch 101/200
Epoch 102/200
Epoch 103/200
Epoch 104/200
Epoch 105/200
Epoch 106/200
Epoch 107/200
Epoch 108/200
Epoch 109/200
Epoch 110/200
Epoch 111/200
Epoch 112/200
Epoch 113/200
Epoch 114/200

Epoch 00114: ReduceLROnPlateau reducing learning rate to 1.000000082740371e-08.
Epoch 115/200


Epoch 116/200
Epoch 117/200
Epoch 118/200
Epoch 119/200
Epoch 120/200
Epoch 121/200
Epoch 122/200
Epoch 123/200
Epoch 124/200
Epoch 125/200
Epoch 126/200
Epoch 127/200
Epoch 128/200
Epoch 00128: early stopping
y_test
 [0 0 0 ... 2 2 2]

mlp_pred
 [0 0 0 ... 2 2 2]

Training model and cross validate using fold #3...
 
Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_6 (Dense)              (None, 32)                864       
_________________________________________________________________
dropout_2 (Dropout)          (None, 32)                0         
_________________________________________________________________
dense_7 (Dense)              (None, 16)                528       
_________________________________________________________________
dense_8 (Dense)              (None, 3)                 51        
Total params: 1,443
Trainable params: 1,443
Non-trainable params:

Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 00048: early stopping
y_test
 [0 0 0 ... 2 2 2]

mlp_pred
 [0 0 0 ... 2 2 2]

Training model and cross validate using fold #4...
 
Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_9 (Dense)              (None, 32)                864       
_________________________________________________________________
dropout_3 (Dropout)          (None, 32)                0         
_________________________________________________________________
dense_10 (Dense)             (None, 16)                528       
_________________________________________________________________
dense_11 (Dense)             (None, 3)                 51        
Total params: 1,443
Trainable params: 1,443
Non-trainable params: 0
__________

Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200

Epoch 00051: ReduceLROnPlateau reducing learning rate to 1.0000000474974514e-05.
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200

Epoch 00071: ReduceLROnPlateau reducing learning rate to 1.0000000656873453e-06.
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78/200
Epoch 79/200
Epoch 80/200
Epoch 81/200
Epoch 82/200
Epoch 83/200
Epoch 84/200
Epoch 85/200
Epoch 86/200
Epoch 87/200
Epoch 88/200
Epoch 89/200
Epoch 90/200
Epoch 91/200

Epoch 00091: ReduceLROnPlateau reducing learning rate to 1.0000001111620805e-07.
Epoch 92/200
Epoch 93/200
Epoch 94/200


Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200

Epoch 00038: ReduceLROnPlateau reducing learning rate to 0.00010000000474974513.
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200

Epoch 00058: ReduceLROnPlateau reducing learning rate to 1.0000000474974514e-05.
Epoch 59/200


Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 00073: early stopping
y_test
 [0 0 0 ... 2 2 2]

mlp_pred
 [0 0 0 ... 2 2 2]


In [23]:
print('MLP - prediction accuracy of each fold:\n {}'.format(acc_scores))
print('')
print('MLP - average accuracy in 5-fold = {} with std. deviation {}' .format(np.mean(acc_scores), np.std(acc_scores)))
print('')
print('Time taken: {:.2f} seconds' .format(time_taken))

MLP - prediction accuracy of each fold:
 [0.9831249713897705, 0.9831249713897705, 0.981249988079071, 0.9825000166893005, 0.9825000166893005]

MLP - average accuracy in 5-fold = 0.9824999928474426 with std. deviation 0.000684647103087295

Time taken: 40.88 seconds
