In [None]:
from keras import layers
from keras import optimizers
from keras.models import Sequential
import keras.backend as K
import tensorflow as tf
import matplotlib.pyplot as plt
import numpy as np
import sys
import os

os.chdir('E:\ZFS_TEST\Analysis')
sys.path.append('E:\ZFS_TEST\Analysis')

from utils.logger import DualLogger
from utils.tools import date_time
from utils.voice_preprocess.mfcc_data_loader import DataPack

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
def f1(y_true, y_pred):
	'''
	compute f1 score
	'''
	y_pred = K.round(y_pred)
	tp = K.sum(K.cast(y_true * y_pred, 'float'), axis=0)
	# tn = K.sum(K.cast((1-y_true)*(1-y_pred), 'float'), axis=0)
	fp = K.sum(K.cast((1 - y_true) * y_pred, 'float'), axis=0)
	fn = K.sum(K.cast(y_true * (1 - y_pred), 'float'), axis=0)

	p = tp / (tp + fp + K.epsilon())
	r = tp / (tp + fn + K.epsilon())

	f1 = 2 * p * r / (p + r + K.epsilon())
	f1 = tf.where(tf.is_nan(f1), tf.zeros_like(f1), f1)
	return K.mean(f1)

In [3]:
def build_MLP_model():
	print('building MLP model...')
	hidden_layer_sizes = (300, 200, 100, 10)
	model = Sequential([
		# layers.Flatten(input_shape=(9, 40)),
		layers.Dense(units=hidden_layer_sizes[0], activation='relu', input_dim=360),
		layers.Dense(units=hidden_layer_sizes[1], activation='relu'),
		layers.Dense(units=hidden_layer_sizes[2], activation='relu'),
		layers.Dense(units=hidden_layer_sizes[3], activation='relu'),
		layers.Dense(units=1, activation='sigmoid'),
	])

	opt = optimizers.Adam(lr=1e-5, decay=0.)
	model.compile(optimizer=opt, loss='mse', metrics=['acc', f1])
	print('built.')
	return model


def build_CNN_model():
	print('building CNN model...')
	model = Sequential([
		layers.Conv1D(input_shape=(9, 40), filters=3, kernel_size=(3,)),
		layers.Activation('relu'),
		layers.MaxPool1D(pool_size=2),
		layers.Flatten(),
		layers.Dense(units=10),
		layers.Activation('relu'),
		layers.Dense(units=1),
		layers.Activation('sigmoid')
	])

	opt = optimizers.Adam(lr=0.0003, decay=1e-3)
	model.compile(optimizer=opt, loss='mse', metrics=['acc', f1])
	print('built.')
	return model


def build_RNN_model():
	print('building RNN model...')
	layer_units = (40, 200, 20, 1)
	model = Sequential()
	model.add(layers.GRU(units=layer_units[1], activation='tanh',
						 return_sequences=False, input_shape=(None, layer_units[0])))
	model.add(layers.Dropout(0.2))
	model.add(layers.Dense(units=layer_units[2], activation='relu'))
	model.add(layers.Dropout(0.2))
	model.add(layers.Dense(units=layer_units[3], activation='sigmoid'))

	opt = optimizers.RMSprop(lr=0.01, rho=0.9, epsilon=None, decay=0.001)
	model.compile(optimizer=opt, loss='mse', metrics=['acc', f1])
	print('built.')
	return model


def load_train_test(wkdir, chunks=False, test_size=None):
	dataset = DataPack()
	if chunks == True:
		dataset.from_chunks_dir(wkdir)
	else:
		dataset.from_wav_dir(wkdir)
	dataset.apply_subsampling()
	dataset.roll_f_as_last()
	print('shape like:')
	dataset.show_shape()
	print('data loaded.\n')
	return dataset.train_test_split(test_size=test_size)


def plot_history(which='acc'):
	global history, DATETIME
	plt.plot(history.history[which])
	plt.plot(history.history['val_%s' % which])
	plt.title('Model %s' % which)
	plt.ylabel(('%s' % which).upper())
	plt.xlabel('Epoch')
	plt.legend(['Train', 'Val'], loc='upper left')
	plt.savefig('outputs/%s%s.png' % (DATETIME, which))
	plt.show()


def scikit_clf(train, test):
	from sklearn.neural_network import MLPClassifier
	clf = MLPClassifier(hidden_layer_sizes=(300, 200, 100, 10),
						activation='relu', solver='adam',
						learning_rate_init=1e-5, verbose=True, shuffle=True)
	print('clf ready.\n', clf)
	print()
	clf.fit(train.data, train.labels)
	print('\ntrain over.\n')
	train_acc = clf.score(train.data, train.labels)
	print('train acc =', train_acc)
	val_acc = clf.score(test.data, test.labels)
	print('val acc =', val_acc)


In [4]:
build_which_model = {
	'RNN': build_RNN_model,
	'CNN': build_CNN_model,
	'MLP': build_MLP_model
}

wkdirs = [
	'Data/Study3/subjects/yzc/trimmed',
	'Data/Study3/subjects/0305_1/trimmed',
	'Data/Study3/subjects/0305_2/trimmed',
	'Data/Study3/subjects/cjr/trimmed',
	'Data/Study3/subjects/gfz/trimmed',
	'Data/Study3/subjects/wty/trimmed',
	'Data/Study3/subjects/wwn/trimmed',
	'Data/Study3/subjects/xy/trimmed',
	# 'Data/Study3/subjects/wj/trimmed',
]

valdir = 'Data/Study3/subjects/zfs/trimmed'

In [6]:
train = DataPack()
train.from_wav_dir(wkdirs)
train.apply_subsampling()
train.to_flatten()
# train.roll_f_as_last()
print('train shape like:')
train.data = train.data[:10]
train.labels = train.labels[:10]
train.show_shape()

test = DataPack()
test.from_wav_dir(valdir)
test.apply_subsampling()
test.to_flatten()
# test.roll_f_as_last()
print('test shape like:')
test.show_shape()

In [None]:
model_type = 'MLP'

os.path.exists('model_state')
DATETIME = date_time()
DualLogger('logs/%s%s.txt' % (DATETIME, model_type))

In [8]:
model = build_which_model[model_type]()
model.summary()

In [None]:
# scikit_clf(train, test)




history = model.fit(train.data, train.labels, batch_size=1, epochs=10, verbose=1)
train_loss, train_acc, train_f1 = model.evaluate(train.data, train.labels, batch_size=10)
test_loss, test_acc, test_f1 = model.evaluate(test.data, test.labels, batch_size=10)

print('acc, f1 on train:', train_acc, train_f1)
print('acc, f1 on test :', test_acc, test_f1)

model.save('voice/model_state/%s%s %d-%d.h5' % (DATETIME, model_type, train_acc * 100, test_acc * 100))

# Plot training & validation accuracy values
plot_history('acc')

# Plot training & validation loss values
plot_history('loss')

# Plot training & validation f1 values
plot_history('f1')

train shape like:
data: (10, 360)
labels: (10,)
names: (76366,)



test shape like:
data: (7803, 360)
labels: (7803,)
names: (7803,)

building MLP model...
built.
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1 (Dense)              (None, 300)               108300    
_________________________________________________________________
dense_2 (Dense)              (None, 200)               60200     
_________________________________________________________________
dense_3 (Dense)              (None, 100)               20100     
_________________________________________________________________
dense_4 (Dense)              (None, 10)                1010      
_________________________________________________________________
dense_5 (Dense)              (None, 1)                 11        
Total params: 189,621
Trainable params: 189,621
Non-trainable params: 0
_________________________________________________________________
None


ValueError: Error when checking model input: the list of Numpy arrays that you are passing to your model is not the size the model expected. Expected to see 1 array(s), but instead got the following list of 10 arrays: [array([[-6.00206565e+02],
       [-6.03254324e+02],
       [-6.08159516e+02],
       [-6.12868989e+02],
       [-6.18544661e+02],
       [-6.23393385e+02],
       [-6.23467645e+02],
       [-6.264758...