In [1]:
import pickle
import numpy as np
from dataset import Dataset, load_data, attention_init
from sklearn.model_selection import train_test_split
from keras.layers import Input, Dense, Masking, Dropout, LSTM, Bidirectional, Activation, Conv2D, Conv1D, MaxPool1D, AveragePooling1D, BatchNormalization
from keras.layers.merge import dot
from keras.models import Model, load_model
from keras.utils import to_categorical
from keras.callbacks import EarlyStopping
from keras.callbacks import ModelCheckpoint
from keras import optimizers
from keras import backend as k

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
max_len = 516
features_number = 34
hidden_unit = 512
dropout_rate = 0.65
lstm_cells = 128
classes = 5
batch = 64
epochs = 5000

In [3]:
def pltfunction(acc,loss,name):
    import matplotlib.pyplot as plt
    fig, ax = plt.subplots()
    epoch = range(len(acc))
    plt.xlabel('epoch')
    plt.plot(epoch,acc,"x-",label=name+'acc')
    plt.plot(epoch,loss, "+-", label=name+'loss')
    plt.grid(True)
    plt.legend(loc=1)
    plt.savefig(name)
    plt.show()
    

In [4]:
def SER_mix_model(tra_data, tra_label, val_data, val_label, max_len,features_num, hidden_unit, dp_rate, lstm_cells, classes, epochs, batch_size):

    u_train, u_val = attention_init(tra_data.shape[0], val_data.shape[0], 256, 1.0/256)
    
    with k.name_scope('globalCNN_BLSTMLayer'):
        ipt_features = Input(shape=(max_len, features_num))
        x = Conv1D(64,3,activation='relu',kernel_initializer='uniform',bias_initializer='zeros')(ipt_features)
        x = Conv1D(128,1,activation='relu',kernel_initializer='uniform',bias_initializer='zeros')(x)
        x = Conv1D(256,3,activation='relu',kernel_initializer='uniform',bias_initializer='zeros')(x)
        x = MaxPool1D(2,2)(x)
        x = Conv1D(512,3,activation='relu',kernel_initializer='uniform',bias_initializer='zeros')(ipt_features)
        x = Conv1D(1024,1,activation='relu',kernel_initializer='uniform',bias_initializer='zeros')(x)
        x = Conv1D(2048,3,activation='relu',kernel_initializer='uniform',bias_initializer='zeros')(x)
        x = MaxPool1D(2,2)(x)
        x = BatchNormalization(axis=-1, momentum=0.9)(x)
#         x = Masking(mask_value=-100.0)(ipt_features)
        x = Dense(hidden_unit, activation='relu',kernel_initializer='uniform',bias_initializer='zeros')(x)
        x = Dropout(dp_rate)(x)
#         x = Dense(hidden_unit, activation='relu')(ipt_features)
#         x = Dropout(dp_rate)(x)
        x = Bidirectional(LSTM(lstm_cells, return_sequences=True, dropout=dp_rate,kernel_initializer='uniform',bias_initializer='zeros'))(x)
        y = Bidirectional(LSTM(lstm_cells, return_sequences=True, dropout=dp_rate,kernel_initializer='uniform',bias_initializer='zeros'))(x)
    with k.name_scope('AttentionLayer'):
        ipt_attention = Input(shape=(lstm_cells*2,))
        u = Dense(lstm_cells*2, activation='softmax',kernel_initializer='uniform',bias_initializer='zeros')(ipt_attention)
        alp = dot([u,y], axes=-1)
        alp = Activation('softmax')(alp)
    with k.name_scope('WeightPooling'):
        z = dot([alp, y], axes=1) #utterance-level
    
    opt = Dense(classes, activation='softmax',kernel_initializer='uniform',bias_initializer='zeros')(z)
    model = Model(inputs=[ipt_attention, ipt_features],outputs=opt)
    model.summary()
    optimizer = optimizers.SGD(lr=0.001, decay=1e-6, momentum=0.9, nesterov=True)
    
    model.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer=optimizer)

    file_path = 'weights_blstm_mix_5cla_val_lossbest2' + '.h5'
    callback_list = [
                    EarlyStopping(
                        monitor='loss',
                        patience=150,
                        verbose=1,
                        mode='auto'
                    ),
                    ModelCheckpoint(
                        filepath=file_path,
                        monitor='val_loss',
                        save_best_only='True',
                        verbose=1,
                        mode='auto',
                        period=1
                    )
                    ]

    training = model.fit([u_train, tra_data], tra_label, batch_size=batch_size, epochs=epochs, verbose=1,
                             callbacks=callback_list, 
                             validation_data=([u_val,val_data], val_label))
    history = training.history
    acc = np.asarray(history['acc'])
    loss = np.asarray(history['loss'])
    pltfunction(acc,loss,"training_mix")
    val_loss = np.asarray(history['val_loss'])
    val_acc = np.asarray(history['val_acc'])
    pltfunction(val_loss,val_acc,"testing_mix")

    acc_and_loss = np.column_stack((acc, loss, val_acc, val_loss))
    save_file_blstm = 'blstm_training_mix_5cla_val_lossbest2' + '.csv'
    with open(save_file_blstm, 'wb'):
        np.savetxt(save_file_blstm, acc_and_loss)
    
#     u_test, _ = attention_init(x_test.shape[0], x_test.shape[0], 256, 1.0/256)
    score_test, accuracy_test = model.evaluate([u_val,val_data], val_label, batch_size=128, verbose=1)
#     score_2, accuracy_2 = model.evaluate([u_val2,x_test2], y_test2, batch_size=128, verbose=1)
    print('*******************************************************')
    print("Final test validation accuracy: %s" % accuracy_test)
#     print("Final test2 validation accuracy: %s" % accuracy_2)
    print('*******************************************************')

In [8]:
mix_tra_label = np.asarray(pickle.load(open('tra_label_5cla_mix_labels_ActorDep.p', 'rb')))
mix_tra_feature = np.asarray(pickle.load(open('tra_data_5cla_mix_features_ActorDep.p', 'rb')))
mix_val_data=np.asarray(pickle.load(open('val_data_5cla_mix_features_ActorDep.p','rb')))
mix_val_label=np.asarray(pickle.load(open('val_label_5cla_mix_labels_ActorDep.p','rb')))

m = SER_mix_model(tra_data=mix_tra_feature, tra_label=mix_tra_label, val_data=mix_val_data, val_label=mix_val_label,
                  max_len=max_len, features_num=features_number, hidden_unit=hidden_unit, dp_rate=dropout_rate,
                  lstm_cells=lstm_cells, classes=classes, epochs=epochs, batch_size=batch)

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 516, 34)      0                                            
__________________________________________________________________________________________________
conv1d_4 (Conv1D)               (None, 514, 512)     52736       input_1[0][0]                    
__________________________________________________________________________________________________
conv1d_5 (Conv1D)               (None, 514, 1024)    525312      conv1d_4[0][0]                   
__________________________________________________________________________________________________
conv1d_6 (Conv1D)               (None, 512, 2048)    6293504     conv1d_5[0][0]                   
__________________________________________________________________________________________________
max_poolin

ValueError: Error when checking input: expected input_1 to have 3 dimensions, but got array with shape (8360, 2)

In [9]:
print(mix_tra_feature)

[[array([-2.1289892e-05, -1.4010740e-06,  2.3305158e-06, ...,
       -3.2377432e-06,  1.1344974e-06, -3.5677920e-07], dtype=float32)
  16000]
 [array([ 2.2111117e-05,  3.8449856e-05,  3.2233750e-06, ...,
       -5.4563716e-06, -2.9611267e-05, -1.5393754e-05], dtype=float32)
  16000]
 [array([-1.3905451e-05,  1.8560990e-05, -2.4276007e-06, ...,
        2.6607271e-05, -3.3781089e-05, -1.2083523e-05], dtype=float32)
  16000]
 ...
 [array([ 0.0000000e+00, -4.2724609e-04, -6.4086914e-04, ...,
       -9.1552734e-05, -9.1552734e-05, -9.1552734e-05], dtype=float32)
  16000]
 [array([ 0.00076294,  0.00094604,  0.00112915, ..., -0.00054932,
        0.00018311, -0.00018311], dtype=float32)
  16000]
 [array([ 0.00222778,  0.00073242,  0.00048828, ..., -0.00595093,
       -0.00570679, -0.00671387], dtype=float32)
  16000]]
