In [1]:
from keras.layers import Input,Dense,Embedding,Conv2D,MaxPool2D
from keras.layers import Reshape,Flatten,Dropout,Concatenate
from keras.callbacks import ModelCheckpoint,TensorBoard
from keras.optimizers import Adam
from keras.models import Model
from sklearn.model_selection import train_test_split
from data_helpers import load_data

Using TensorFlow backend.


In [2]:
print('Loading data')
x, y, vocabulary, vocabulary_inv = load_data()
X_train, X_test, y_train, y_test = train_test_split( x, y, test_size=0.2, random_state=42)


Loading data


In [3]:
import numpy as np
print(np.shape(X_train))

(8529, 56)


In [12]:

sequence_length=x.shape[1]
vocabulary_size = len(vocabulary_inv) # 18765
embedding_dim = 16
filter_sizes = [3,4,5]
num_filters = 512
drop = 0.5

epochs = 5
batch_size = 30

In [15]:
inputs=Input(shape=(sequence_length,),dtype='int32')
embedding=Embedding(input_dim=vocabulary_size,output_dim=embedding_dim,
                    input_length=sequence_length)(inputs)
print(embedding)
reshape=Reshape((sequence_length,embedding_dim,1))(embedding)
print(reshape)

Tensor("embedding_3/embedding_lookup/Identity:0", shape=(?, 56, 16), dtype=float32)


Tensor("reshape_1/Reshape:0", shape=(?, 56, 16, 1), dtype=float32)


In [17]:
conv_0=Conv2D(num_filters,kernel_size=(filter_sizes[0],embedding_dim),
              padding='valid' ,kernel_initializer='normal',activation='relu' 
              )(reshape)
pool_0=MaxPool2D(pool_size=(sequence_length-filter_sizes[0]+1,1),
                 strides=(1,1),padding='valid')(conv_0)
conv_1=Conv2D(num_filters,kernel_size=(filter_sizes[1],embedding_dim),
              padding='valid' ,kernel_initializer='normal',activation='relu' 
              )(reshape)
pool_1=MaxPool2D(pool_size=(sequence_length-filter_sizes[1]+1,1),
                 strides=(1,1),padding='valid')(conv_1)
conv_2=Conv2D(num_filters,kernel_size=(filter_sizes[2],embedding_dim),
              padding='valid' ,kernel_initializer='normal',activation='relu' 
              )(reshape)
pool_2=MaxPool2D(pool_size=(sequence_length-filter_sizes[2]+1,1),
                 strides=(1,1),padding='valid')(conv_2)
#-----------进行全连接
concatenated_tensor=Concatenate(axis=1)([pool_0,pool_1,pool_2])
flatten=Flatten()(concatenated_tensor)
dropout=Dropout(drop)(flatten)
output=Dense(units=2,activation='softmax')(dropout)
model =Model(inputs=inputs,outputs=output)

log_dir = "./logs/"
#记录所有训练过程，每隔一定步数记录最大值
tensorboard = TensorBoard(log_dir=log_dir,histogram_freq=0)
checkpoint = ModelCheckpoint(log_dir + "best_weights.h5",
                                 monitor="val_loss",
                                 mode='min',
                                 save_weights_only=True,
                                 save_best_only=True,
                                 verbose=1,
                                 period=1)

callback_lists=[tensorboard,checkpoint]

#定义优化器
adam = Adam(lr=1e-4, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)
model.compile(adam,loss='binary_crossentropy',metrics=['accuracy'])
import os
target_dir='./models/'
if not os.path.exists(target_dir):
    os.makedirs(target_dir)
model.save('./models/model.h5')
model.save_weights('./models/weights.h5')
print("Traning Model...")

Traning Model...


In [18]:
history=model.fit(X_train,y_train,batch_size=batch_size,epochs=epochs
                  ,verbose=2,callbacks=callback_lists,validation_data=(X_test,y_test))
print(history.history)

Train on 8529 samples, validate on 2133 samples


Epoch 1/5


 - 9s - loss: 0.6928 - acc: 0.5100 - val_loss: 0.6923 - val_acc: 0.5021



Epoch 00001: val_loss improved from inf to 0.69228, saving model to ./logs/best_weights.h5
Epoch 2/5


 - 8s - loss: 0.6891 - acc: 0.5613 - val_loss: 0.6879 - val_acc: 0.6203



Epoch 00002: val_loss improved from 0.69228 to 0.68791, saving model to ./logs/best_weights.h5
Epoch 3/5


 - 8s - loss: 0.6786 - acc: 0.6413 - val_loss: 0.6744 - val_acc: 0.6493



Epoch 00003: val_loss improved from 0.68791 to 0.67438, saving model to ./logs/best_weights.h5
Epoch 4/5


 - 8s - loss: 0.6436 - acc: 0.7103 - val_loss: 0.6329 - val_acc: 0.6864



Epoch 00004: val_loss improved from 0.67438 to 0.63288, saving model to ./logs/best_weights.h5
Epoch 5/5


 - 8s - loss: 0.5604 - acc: 0.7686 - val_loss: 0.5696 - val_acc: 0.7159



Epoch 00005: val_loss improved from 0.63288 to 0.56956, saving model to ./logs/best_weights.h5
{'val_loss': [0.6922848684878289, 0.6879065004079151, 0.6743843218445275, 0.6328807091746484, 0.5695642030859464], 'val_acc': [0.5021097108449278, 0.6202531708024558, 0.6493202104179501, 0.686357243570765, 0.7158931080466752], 'loss': [0.6927885451536551, 0.689063959041197, 0.6786201501286788, 0.6436335173996661, 0.5604007385102452], 'acc': [0.5100246293000508, 0.5612615856053416, 0.6413413136725773, 0.7102825662492658, 0.768554343541575]}


In [19]:
print(history.history['val_loss'])

[0.6922848684878289, 0.6879065004079151, 0.6743843218445275, 0.6328807091746484, 0.5695642030859464]


In [20]:
#-----------------predict------------------------
from keras.models import Sequential,load_model
from sklearn.model_selection import train_test_split
from data_helpers import load_data
x, y, vocabulary, vocabulary_inv = load_data()
X_train, X_test, y_train, y_test = train_test_split( x, y, test_size=0.2, random_state=42)
model_path='./models/model.h5'
model_weigths_path='./logs/best_weights.h5'
load_model(model_path)
model.load_weights(model_weigths_path)
y=model.predict(X_test)
print(y)


[[0.5332981  0.4667019 ]
 [0.3247114  0.6752886 ]
 [0.5276256  0.47237432]
 ...
 [0.22835843 0.77164155]
 [0.4378191  0.5621809 ]
 [0.4697877  0.53021234]]
