In [2]:
from keras.layers import Input, Dense, Embedding, Conv2D, MaxPool2D
from keras.layers import Reshape, Flatten, Dropout, Concatenate
from keras.callbacks import ModelCheckpoint
from keras.optimizers import Adam
from keras.models import Model
from sklearn.model_selection import train_test_split
from data_helpers import load_data

print('Loading data')
x, y, vocabulary, vocabulary_inv = load_data()

# x.shape -> (10662, 56)
# y.shape -> (10662, 2)
# len(vocabulary) -> 18765
# len(vocabulary_inv) -> 18765

X_train, X_test, y_train, y_test = train_test_split( x, y, test_size=0.2, random_state=42)

# X_train.shape -> (8529, 56)
# y_train.shape -> (8529, 2)
# X_test.shape -> (2133, 56)
# y_test.shape -> (2133, 2)


sequence_length = x.shape[1] # 56
vocabulary_size = len(vocabulary_inv) # 18765
embedding_dim = 256
filter_sizes = [3,4,5]
num_filters = 512
drop = 0.5

epochs = 10
batch_size = 30

# this returns a tensor
print("Creating Model...")
inputs = Input(shape=(sequence_length,), dtype='int32')
embedding = Embedding(input_dim=vocabulary_size, output_dim=embedding_dim, input_length=sequence_length)(inputs)
reshape = Reshape((sequence_length,embedding_dim,1))(embedding)

conv_0 = Conv2D(num_filters, kernel_size=(filter_sizes[0], embedding_dim), padding='valid', kernel_initializer='normal', activation='relu')(reshape)
conv_1 = Conv2D(num_filters, kernel_size=(filter_sizes[1], embedding_dim), padding='valid', kernel_initializer='normal', activation='relu')(reshape)
conv_2 = Conv2D(num_filters, kernel_size=(filter_sizes[2], embedding_dim), padding='valid', kernel_initializer='normal', activation='relu')(reshape)

maxpool_0 = MaxPool2D(pool_size=(sequence_length - filter_sizes[0] + 1, 1), strides=(1,1), padding='valid')(conv_0)
maxpool_1 = MaxPool2D(pool_size=(sequence_length - filter_sizes[1] + 1, 1), strides=(1,1), padding='valid')(conv_1)
maxpool_2 = MaxPool2D(pool_size=(sequence_length - filter_sizes[2] + 1, 1), strides=(1,1), padding='valid')(conv_2)

concatenated_tensor = Concatenate(axis=1)([maxpool_0, maxpool_1, maxpool_2])
flatten = Flatten()(concatenated_tensor)
dropout = Dropout(drop)(flatten)
output = Dense(units=2, activation='softmax')(dropout)

# this creates a model that includes
model = Model(inputs=inputs, outputs=output)

checkpoint = ModelCheckpoint('weights.{epoch:03d}-{val_accuracy:.4f}.hdf5', monitor='val_accuracy', verbose=1, save_best_only=True, mode='auto')
adam = Adam(lr=1e-4, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)

model.compile(optimizer=adam, loss='binary_crossentropy', metrics=['accuracy'])
print("Training Model...")
model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs, verbose=1, callbacks=[checkpoint], validation_data=(X_test, y_test))  # starts training


Loading data
Creating Model...
Traning Model...
Epoch 1/10
Epoch 00001: val_accuracy improved from -inf to 0.62588, saving model to weights.001-0.6259.hdf5
Epoch 2/10
Epoch 00002: val_accuracy improved from 0.62588 to 0.65870, saving model to weights.002-0.6587.hdf5
Epoch 3/10
Epoch 00003: val_accuracy improved from 0.65870 to 0.71683, saving model to weights.003-0.7168.hdf5
Epoch 4/10
Epoch 00004: val_accuracy improved from 0.71683 to 0.74027, saving model to weights.004-0.7403.hdf5
Epoch 5/10
Epoch 00005: val_accuracy improved from 0.74027 to 0.75621, saving model to weights.005-0.7562.hdf5
Epoch 6/10
Epoch 00006: val_accuracy improved from 0.75621 to 0.76043, saving model to weights.006-0.7604.hdf5
Epoch 7/10
Epoch 00007: val_accuracy improved from 0.76043 to 0.76418, saving model to weights.007-0.7642.hdf5
Epoch 8/10
Epoch 00008: val_accuracy did not improve from 0.76418
Epoch 9/10
Epoch 00009: val_accuracy did not improve from 0.76418
Epoch 10/10
Epoch 00010: val_accuracy did not 

<tensorflow.python.keras.callbacks.History at 0x7f9c39a91070>

In [13]:
from sklearn.feature_extraction.text import CountVectorizer
count = CountVectorizer()

In [21]:
test_sentence = ["Meyer's decision to hire Doyle as the team's director of sports performance came to light Thursday when the Jaguars announced his 28-member coaching staff. Meyer also confirmed the hiring of offensive coordinator Darrell Bevell, defensive coordinator Joe Cullen, and former Louisville, Texas and South Florida head coach Charlie Strong as assistant head coach/inside linebackers coach."]

In [22]:
bag = count.fit_transform(test_sentence).toarray()

In [23]:
model.predict(bag)

InvalidArgumentError:  Input to reshape is a tensor with 11776 values, but the requested shape has 14336
	 [[node functional_3/reshape_1/Reshape (defined at <ipython-input-16-62ce588ed3b8>:1) ]] [Op:__inference_predict_function_11762]

Function call stack:
predict_function


In [18]:
model.predict(x)

array([[0.87768024, 0.12231974],
       [0.04613183, 0.9538682 ],
       [0.05524229, 0.94475776],
       ...,
       [0.9988391 , 0.001161  ],
       [0.9854683 , 0.01453162],
       [0.9988248 , 0.00117526]], dtype=float32)

In [19]:
x

array([[16683, 14037,  9038, ...,   473,   473,   473],
       [16683,  7382,  5498, ...,   473,   473,   473],
       [ 5461,  2573, 16947, ...,   473,   473,   473],
       ...,
       [ 1347,  9053, 15743, ...,   473,   473,   473],
       [16683, 16729,  9966, ...,   473,   473,   473],
       [ 5713,  9038, 18298, ...,   473,   473,   473]])

In [20]:
bag

array([[1, 1, 1, 1, 1, 1, 1]])