In [5]:
import glob
import os
# import librosa
# from librosa import display
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from matplotlib.pyplot import specgram
%matplotlib inline
plt.style.use('ggplot')
# import pydot
# import graphviz
plt.rcParams['font.family'] = 'serif'
plt.rcParams['font.serif'] = 'Ubuntu'
plt.rcParams['font.monospace'] = 'Ubuntu Mono'
plt.rcParams['font.size'] = 12
plt.rcParams['axes.labelsize'] = 11
plt.rcParams['axes.labelweight'] = 'bold'
plt.rcParams['axes.titlesize'] = 14
plt.rcParams['xtick.labelsize'] = 10
plt.rcParams['ytick.labelsize'] = 10
plt.rcParams['legend.fontsize'] = 11
plt.rcParams['figure.titlesize'] = 13

import dill

# Load Leapmotion dataset

In [6]:
dill.load_session('data_ready.db')

In [10]:
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder

def one_hot (integer_encoded):
    onehot_encoder = OneHotEncoder(sparse=False)
    integer_encoded = integer_encoded.reshape(len(integer_encoded), 1)
    onehot_encoded = onehot_encoder.fit_transform(integer_encoded)
    return onehot_encoded

y_leap_train = one_hot(train_label)
y_leap_test = one_hot(test_label)
X_leap_train = train
X_leap_test = test

In [46]:
print(X_leap_train.shape)
print(X_leap_test.shape)
print(y_leap_train.shape)
print(y_leap_test.shape)

(884, 100, 87)
(299, 100, 87)
(884, 6)
(299, 6)


# Load Voice dataset

In [12]:
features = np.loadtxt('nn_simple_features.csv', delimiter=',')
labels = np.array(np.loadtxt('nn_simple_labels.csv', delimiter=','), dtype=np.int)

In [19]:
from sklearn.preprocessing import normalize
from sklearn.model_selection import train_test_split
import time
X_all = features

from sklearn.preprocessing import OneHotEncoder

enc = OneHotEncoder(sparse=False)
y_all = enc.fit_transform(labels.reshape((-1, 1)))

In [28]:
X_voice_train, X_voice_test, y_voice_train, y_voice_test = train_test_split(
        X_all, y_all, stratify=y_all, train_size=.75, random_state=round(time.time()))



In [29]:
print(X_voice_train.shape)
print(X_voice_test.shape)
print(y_voice_train.shape)
print(y_voice_test.shape)

(10633, 1280)
(3545, 1280)
(10633, 6)
(3545, 6)


# Define Leapmotion Model

In [102]:
#LSTM
import keras
from keras.models import Sequential
from keras.layers import LSTM, Dense

import numpy as np

data_dim = 87
timesteps = 100
num_classes = 6
leap_batch_size = 55

# Expected input batch shape: (batch_size, timesteps, data_dim)
# Note that we have to provide the full batch_input_shape since the network is stateful.
# the sample of index i in batch k is the follow-up for the sample i in batch k-1.
def build_leapmotion_model():
    model = Sequential()
    model.add(LSTM(55, return_sequences=True,
                   input_shape=(timesteps, data_dim)))  # returns a sequence of vectors of dimension 32
    model.add(LSTM(55, return_sequences=True))  # returns a sequence of vectors of dimension 32
    model.add(LSTM(55, return_sequences=True)) # return a single vector of dimension 32
    model.add(LSTM(55))
    model.add(Dense(55))
    
    return model

# Define Voice Model

In [103]:
## Imports
from __future__ import print_function
import keras
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras import backend as K
from keras.utils import print_summary, plot_model
from keras import regularizers
from keras.layers import BatchNormalization

In [104]:
img_rows, img_cols = 40, 32

X_voice_train = X_voice_train.reshape(X_voice_train.shape[0], img_rows, img_cols, 1)
X_voice_test = X_voice_test.reshape(X_voice_test.shape[0], img_rows, img_cols, 1)

In [111]:
def build_voice_model(input_shape=(img_rows, img_cols, 1)):
    # Model definition
    model = Sequential()
    model.add(Conv2D(4, kernel_size=(5, 5),
                     activation='relu',
                     input_shape=input_shape))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.2))
    
    model.add(Conv2D(8, (3, 3), activation='relu'))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.2))
    
    model.add(Flatten())
    model.add(Dense(128, activation='relu'))
    model.add(BatchNormalization())
    model.add(Dropout(0.2))

    model.add(Dense(64, activation='relu'))
    model.add(BatchNormalization())
    model.add(Dropout(0.2))
    
    return model

In [190]:
leapmotion_model = build_leapmotion_model()
voice_model = build_voice_model()

In [60]:
from keras.layers import Merge

model = Sequential()
model.add(Merge([leapmotion_model, voice_model], mode='concat', concat_axis=-1))

model.add(Dense(64))
model.add(Dense(num_classes, activation='softmax'))

  after removing the cwd from sys.path.


In [61]:
model.compile(loss=keras.losses.categorical_crossentropy,
                  optimizer='rmsprop',
                  metrics=['accuracy'])

In [62]:
model.fit([X_leap_sub_train, X_voice_sub_train], y_voice_sub_train,
          batch_size=55, shuffle=False,
          epochs=20,
          verbose=1)

Epoch 1/20

KeyboardInterrupt: 

# Prepare the merged dataset

In [170]:
X_voice_sub_train = []
y_voice_sub_train = []


for _ in range(5):
    for i in range(6):
        cnt = np.argmax(y_leap_train, axis=1).tolist().count(i)

        tmp = list(filter(lambda d: np.argmax(d[1])==i, zip(X_voice_train,y_voice_train)))
        shuffle(tmp)

        X_voice_sub_train += list(map(lambda d: d[0], tmp[:cnt]))
        y_voice_sub_train += list(map(lambda d: d[1], tmp[:cnt]))
    

X_voice_sub_train = np.array(X_voice_sub_train)
y_voice_sub_train = np.array(y_voice_sub_train)

In [171]:
print(X_voice_sub_train.shape)
print(y_voice_sub_train.shape)

(4420, 40, 32, 1)
(4420, 6)


In [186]:

X_leap_sub_train = np.empty((0, 100, 87))
for _ in range(5):
    X_leap_sub_train = np.vstack([X_leap_sub_train, X_leap_train])

In [187]:
X_leap_sub_train.shape

(4420, 100, 87)

In [151]:
from random import shuffle

In [197]:
X_voice_sub_test = []


    
for i in range(6):
    cnt = np.argmax(y_leap_test, axis=1).tolist().count(i)

    tmp = list(filter(lambda d: d[1]==i, zip(X_voice_test, np.argmax(y_voice_test, axis=1))))

    X_voice_sub_test += list(map(lambda d: d[0], tmp[:cnt]))
    

X_voice_sub_test = np.array(X_voice_sub_test)

In [198]:
X_voice_sub_test.shape

(299, 40, 32, 1)

In [199]:
score = model.evaluate([X_leap_test, X_voice_sub_test], y_leap_test, verbose=1)
print()
print('Test loss:', score[0])
print('Test accuracy:', score[1])


Test loss: 0.33863449306616616
Test accuracy: 0.9331103678929766
