In [1]:
import numpy as np
import tensorflow as tf
from tensorflow import keras

from tensorflow.keras.layers import Dense, Input, LSTM, Embedding, Dropout, Activation, Conv1D,GRU
from tensorflow.keras.layers import Bidirectional, GlobalMaxPool1D, Concatenate
from tensorflow.keras.models import Model,load_model
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.utils import plot_model
from tensorflow.keras.models import Sequential
from tensorflow.keras.callbacks import ModelCheckpoint,EarlyStopping
from tensorflow.keras.optimizers import Adam
import tensorflow_addons as tfa
import json
import numpy as np
from sklearn.model_selection import train_test_split

 The versions of TensorFlow you are currently using is 2.3.0 and is not supported. 
Some things might work, some things might not.
If you were to encounter a bug, do not file an issue.
If you want to make sure you're using a tested and supported configuration, either change the TensorFlow version or the TensorFlow Addons's version. 
You can find the compatibility matrix in TensorFlow Addon's readme:
https://github.com/tensorflow/addons


In [11]:
embed_trained = np.load('./weights/embedding_trained_mat.npy')
len(embed_trained[0][0])
lstm_trained = np.load('./weights/lstm_trained_mat.npy', allow_pickle=True)
embed_mat = np.load('./weights/embedding_mat.npy')
len(lstm_trained[4])


128

In [3]:
max_len=30
feat_vec=300

with open ("../../encode_data_tiki.txt", "r") as f:
    encode_data_tiki = json.loads(f.read())
padded_doc_tiki=pad_sequences(encode_data_tiki,maxlen=max_len,padding="post")
with open ("../../output_tiki.txt", "r") as f:
    output_ori_tiki = np.array(json.loads(f.read()))
print(padded_doc_tiki.shape, output_ori_tiki.shape)
X_train_tiki, X_test_tiki, y_train_ori_tiki, y_test_ori_tiki = train_test_split(padded_doc_tiki, output_ori_tiki, test_size=0.2, random_state=14)

y_train_tiki = np.delete(y_train_ori_tiki, 6, 1)
y_test_tiki = np.delete(y_test_ori_tiki, 6, 1)

(2987, 30) (2987, 7)


In [4]:
X_train_tiki.shape

(2389, 30)

In [5]:
def get_output_bilstm_model():
    inp = Input(shape=(max_len,))
    x = Embedding(len(embed_trained[0]), feat_vec, weights=[embed_trained[0]], input_length=max_len,trainable=False)(inp) # trọng số của từ sẽ không được train lại
    x = Bidirectional(LSTM(128, return_sequences=True), weights = [lstm_trained], trainable = False)(x)
    model = Model(inputs=inp, outputs=x)
    return model
X_train_bilstm_output = get_output_bilstm_model().predict(X_train_tiki)
X_test_bilstm_output = get_output_bilstm_model().predict(X_test_tiki)


In [6]:
X_train_bilstm_output.shape

(2389, 30, 256)

In [7]:
f1 = tfa.metrics.F1Score(num_classes=6, average='micro', threshold=0.5)
rlp = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_f1_score',mode='max', patience=3, verbose=1, factor=0.4)
es = tf.keras.callbacks.EarlyStopping(monitor='val_f1_score',mode='max', patience=6, verbose=1, restore_best_weights=True)
filepath = ('./weights/best_concat_model.hdf5')
checkpoint = ModelCheckpoint(filepath, monitor='val_f1_score', verbose=1, save_best_only=True,save_weights_only=True, mode='max')

def get_model_concat():
    inp = Input(shape=(max_len,))
    x = Embedding(len(embed_mat), feat_vec, weights=[embed_mat], input_length=max_len,trainable=True)(inp) # trọng số của từ sẽ không được train lại
    x = Bidirectional(LSTM(128, return_sequences=True), trainable = False)(x)
    second_input = Input(shape=(30, 256))
    x = Concatenate(axis=-1)([0.8*x, 0.2*second_input])
#     x = 0.8*x + 0.2*second_input
    x = Conv1D(64,3,activation="relu")(x)
    x = GlobalMaxPool1D()(x)
    x = Dense(32, activation="relu")(x)
#     x = Dense(32, activation="relu")(x)
#     x = Dropout(0.2)(x)
    x = Dense(6, activation="sigmoid")(x)
    model = Model(inputs=[inp, second_input], outputs=x)
    print(model.summary())

    model.compile(loss='binary_crossentropy', optimizer=tf.keras.optimizers.Adam(lr=0.0015), metrics= [f1])
    return model

In [8]:
model = get_model_concat().fit([X_train_tiki, X_train_bilstm_output], y_train_tiki, batch_size=8, epochs=15, validation_data=([X_test_tiki, X_test_bilstm_output], y_test_tiki), callbacks=[rlp, es, checkpoint])


Model: "functional_5"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_3 (InputLayer)            [(None, 30)]         0                                            
__________________________________________________________________________________________________
embedding_2 (Embedding)         (None, 30, 300)      1354200     input_3[0][0]                    
__________________________________________________________________________________________________
bidirectional_2 (Bidirectional) (None, 30, 256)      439296      embedding_2[0][0]                
__________________________________________________________________________________________________
input_4 (InputLayer)            [(None, 30, 256)]    0                                            
_______________________________________________________________________________________

KeyboardInterrupt: 

In [19]:
new_model = get_model_concat()
new_model.load_weights('./weights/best_concat_model.hdf5')

from sklearn import metrics
def evaluate(X_test, X_test_bilstm_output, y_test, model):    
    y_pre = model.predict([X_test, X_test_bilstm_output])
    for thresh in np.arange(0.2,0.7,0.01):
        print("threshold {0:2.2f} f1 score:{1:2.3f}".format(thresh,metrics.f1_score(y_test,(y_pre>thresh).astype(int), average='micro')))
    return y_pre
y_pre=evaluate(X_test_tiki, X_test_bilstm_output, y_test_tiki, new_model)

Model: "functional_19"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_17 (InputLayer)           [(None, 30)]         0                                            
__________________________________________________________________________________________________
embedding_9 (Embedding)         (None, 30, 300)      1354200     input_17[0][0]                   
__________________________________________________________________________________________________
bidirectional_9 (Bidirectional) (None, 30, 256)      439296      embedding_9[0][0]                
__________________________________________________________________________________________________
input_18 (InputLayer)           [(None, 30, 256)]    0                                            
______________________________________________________________________________________