In [1]:
import os
import time
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from tqdm import tqdm
import math
from sklearn.model_selection import train_test_split
from sklearn import metrics

from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.layers import Dense, Input, CuDNNLSTM, Embedding, Dropout, Activation, CuDNNGRU, Conv1D
from keras.layers import Bidirectional, GlobalMaxPool1D
from keras.optimizers import Adam
from keras.models import Model
from keras import backend as K
from keras.engine.topology import Layer
from keras import initializers, regularizers, constraints, optimizers, layers

Using TensorFlow backend.


In [2]:
import tensorflow as tf
os.environ['cuda_visible_device'] = '0'
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
sess = tf.Session(config=config)
K.tensorflow_backend._get_available_gpus()

['/job:localhost/replica:0/task:0/device:GPU:0',
 '/job:localhost/replica:0/task:0/device:GPU:1']

In [3]:
train_df = pd.read_csv("train.csv")
test_df = pd.read_csv("test.csv")
print("Train shape : ",train_df.shape)
print("Test shape : ",test_df.shape)

Train shape :  (1306122, 3)
Test shape :  (56370, 2)


In [4]:
## split to train and val
train_df, val_df = train_test_split(train_df, test_size=0.08, random_state=2018)

## some config values 
embed_size = 300 # how big is each word vector
max_features = 95000 # how many unique words to use (i.e num rows in embedding vector)
maxlen = 70 # max number of words in a question to use

## fill up the missing values
train_X = train_df["question_text"].fillna("_##_").values
val_X = val_df["question_text"].fillna("_##_").values
test_X = test_df["question_text"].fillna("_##_").values

## Tokenize the sentences
tokenizer = Tokenizer(num_words=max_features)
tokenizer.fit_on_texts(list(train_X))
train_X = tokenizer.texts_to_sequences(train_X)
val_X = tokenizer.texts_to_sequences(val_X)
test_X = tokenizer.texts_to_sequences(test_X)

## Pad the sentences 
train_X = pad_sequences(train_X, maxlen=maxlen)
val_X = pad_sequences(val_X, maxlen=maxlen)
test_X = pad_sequences(test_X, maxlen=maxlen)

## Get the target values
train_y = train_df['target'].values
val_y = val_df['target'].values

In [5]:
#shuffling the data
np.random.seed(2018)
trn_idx = np.random.permutation(len(train_X))
val_idx = np.random.permutation(len(val_X))

train_X = train_X[trn_idx]
val_X = val_X[val_idx]
train_y = train_y[trn_idx]
val_y = val_y[val_idx]

In [6]:
EMBEDDING_FILE = 'glove.840B.300d/glove.840B.300d.txt'
def get_coefs(word,*arr): return word, np.asarray(arr, dtype='float32')
embeddings_index = dict(get_coefs(*o.split(" ")) for o in open(EMBEDDING_FILE))

all_embs = np.stack(embeddings_index.values())
emb_mean,emb_std = all_embs.mean(), all_embs.std()
embed_size = all_embs.shape[1]

word_index = tokenizer.word_index
nb_words = min(max_features, len(word_index))
embedding_matrix = np.random.normal(emb_mean, emb_std, (nb_words, embed_size))
for word, i in word_index.items():
    if i >= max_features: continue
    embedding_vector = embeddings_index.get(word)
    if embedding_vector is not None: embedding_matrix[i] = embedding_vector

In [7]:
# https://www.kaggle.com/yekenot/2dcnn-textclassifier
from keras.layers import Input, Embedding, Dense, Conv2D, MaxPool2D
from keras.layers import Reshape, Flatten, Concatenate, Dropout, SpatialDropout1D

filter_sizes = [1,2,3,5]
num_filters = 36

inp = Input(shape=(maxlen,))
x = Embedding(max_features, embed_size, weights=[embedding_matrix])(inp)
x = Reshape((maxlen, embed_size, 1))(x)

maxpool_pool = []
for i in range(len(filter_sizes)):
    conv = Conv2D(num_filters, kernel_size=(filter_sizes[i], embed_size),
                                 kernel_initializer='he_normal', activation='elu')(x)
    maxpool_pool.append(MaxPool2D(pool_size=(maxlen - filter_sizes[i] + 1, 1))(conv))

z = Concatenate(axis=1)(maxpool_pool)   
z = Flatten()(z)
z = Dropout(0.1)(z)

outp = Dense(1, activation="sigmoid")(z)

model = Model(inputs=inp, outputs=outp)
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [8]:
## Train the model 
model.fit(train_X, train_y, batch_size=512, epochs=2, validation_data=(val_X, val_y))

Train on 1201632 samples, validate on 104490 samples
Epoch 1/2
Epoch 2/2


<keras.callbacks.History at 0x7fbf599d4208>

In [9]:
pred_cnn_val_y = model.predict([val_X], batch_size=1024, verbose=1)
for thresh in np.arange(0.1, 0.501, 0.01):
    thresh = np.round(thresh, 2)
    print("F1 score at threshold {0} is {1}".format(thresh, metrics.f1_score(val_y, (pred_cnn_val_y>thresh).astype(int))))

F1 score at threshold 0.1 is 0.6145605596851772
F1 score at threshold 0.11 is 0.623360609797108
F1 score at threshold 0.12 is 0.6315970628728775
F1 score at threshold 0.13 is 0.6376168224299065
F1 score at threshold 0.14 is 0.6427804936210803
F1 score at threshold 0.15 is 0.6471730162581898
F1 score at threshold 0.16 is 0.6512431365290888
F1 score at threshold 0.17 is 0.6555032925682032
F1 score at threshold 0.18 is 0.6583518930957682
F1 score at threshold 0.19 is 0.6612392582541835
F1 score at threshold 0.2 is 0.6644392033542977
F1 score at threshold 0.21 is 0.6650733585607117
F1 score at threshold 0.22 is 0.6649636999193331
F1 score at threshold 0.23 is 0.6671652954375468
F1 score at threshold 0.24 is 0.6686370197264416
F1 score at threshold 0.25 is 0.6696757169640997
F1 score at threshold 0.26 is 0.669570103961787
F1 score at threshold 0.27 is 0.6698360888384304
F1 score at threshold 0.28 is 0.6713456993482776
F1 score at threshold 0.29 is 0.6712487339024743
F1 score at threshold 0.

In [10]:
pred_cnn_test_y = model.predict([test_X], batch_size=1024, verbose=1)



In [11]:
del word_index, embeddings_index, all_embs, embedding_matrix, model, inp, x
import gc; gc.collect()
time.sleep(10)

In [12]:
class Attention(Layer):
    def __init__(self, step_dim,
                 W_regularizer=None, b_regularizer=None,
                 W_constraint=None, b_constraint=None,
                 bias=True, **kwargs):
        self.supports_masking = True
        self.init = initializers.get('glorot_uniform')

        self.W_regularizer = regularizers.get(W_regularizer)
        self.b_regularizer = regularizers.get(b_regularizer)

        self.W_constraint = constraints.get(W_constraint)
        self.b_constraint = constraints.get(b_constraint)

        self.bias = bias
        self.step_dim = step_dim
        self.features_dim = 0
        super(Attention, self).__init__(**kwargs)

    def build(self, input_shape):
        assert len(input_shape) == 3

        self.W = self.add_weight((input_shape[-1],),
                                 initializer=self.init,
                                 name='{}_W'.format(self.name),
                                 regularizer=self.W_regularizer,
                                 constraint=self.W_constraint)
        self.features_dim = input_shape[-1]

        if self.bias:
            self.b = self.add_weight((input_shape[1],),
                                     initializer='zero',
                                     name='{}_b'.format(self.name),
                                     regularizer=self.b_regularizer,
                                     constraint=self.b_constraint)
        else:
            self.b = None

        self.built = True

    def compute_mask(self, input, input_mask=None):
        return None

    def call(self, x, mask=None):
        features_dim = self.features_dim
        step_dim = self.step_dim

        eij = K.reshape(K.dot(K.reshape(x, (-1, features_dim)),
                        K.reshape(self.W, (features_dim, 1))), (-1, step_dim))

        if self.bias:
            eij += self.b

        eij = K.tanh(eij)

        a = K.exp(eij)

        if mask is not None:
            a *= K.cast(mask, K.floatx())

        a /= K.cast(K.sum(a, axis=1, keepdims=True) + K.epsilon(), K.floatx())

        a = K.expand_dims(a)
        weighted_input = x * a
        return K.sum(weighted_input, axis=1)

    def compute_output_shape(self, input_shape):
        return input_shape[0],  self.features_dim

In [13]:
EMBEDDING_FILE = 'glove.840B.300d/glove.840B.300d.txt'
def get_coefs(word,*arr): return word, np.asarray(arr, dtype='float32')
embeddings_index = dict(get_coefs(*o.split(" ")) for o in open(EMBEDDING_FILE))

all_embs = np.stack(embeddings_index.values())
emb_mean,emb_std = all_embs.mean(), all_embs.std()
embed_size = all_embs.shape[1]

word_index = tokenizer.word_index
nb_words = min(max_features, len(word_index))
embedding_matrix = np.random.normal(emb_mean, emb_std, (nb_words, embed_size))
for word, i in word_index.items():
    if i >= max_features: continue
    embedding_vector = embeddings_index.get(word)
    if embedding_vector is not None: embedding_matrix[i] = embedding_vector
        
inp = Input(shape=(maxlen,))
x = Embedding(max_features, embed_size, weights=[embedding_matrix], trainable=False)(inp)
x = Bidirectional(CuDNNLSTM(128, return_sequences=True))(x)
x = Bidirectional(CuDNNLSTM(64, return_sequences=True))(x)
x = Attention(maxlen)(x)
x = Dense(64, activation="relu")(x)
x = Dense(1, activation="sigmoid")(x)
model = Model(inputs=inp, outputs=x)
model.compile(loss='binary_crossentropy', optimizer=Adam(lr=1e-3), metrics=['accuracy'])

In [14]:
model.fit(train_X, train_y, batch_size=512, epochs=3, validation_data=(val_X, val_y))

Train on 1201632 samples, validate on 104490 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3


<keras.callbacks.History at 0x7fbf3ea3d2b0>

In [15]:
pred_glove_val_y = model.predict([val_X], batch_size=1024, verbose=1)
for thresh in np.arange(0.1, 0.501, 0.01):
    thresh = np.round(thresh, 2)
    print("F1 score at threshold {0} is {1}".format(thresh, metrics.f1_score(val_y, (pred_glove_val_y>thresh).astype(int))))

F1 score at threshold 0.1 is 0.633719288336628
F1 score at threshold 0.11 is 0.6406447898675878
F1 score at threshold 0.12 is 0.6465759849906191
F1 score at threshold 0.13 is 0.6515982824427481
F1 score at threshold 0.14 is 0.6555454820187367
F1 score at threshold 0.15 is 0.6600942991855979
F1 score at threshold 0.16 is 0.6633215635259865
F1 score at threshold 0.17 is 0.6670430309873291
F1 score at threshold 0.18 is 0.6709382731713507
F1 score at threshold 0.19 is 0.6733371817073952
F1 score at threshold 0.2 is 0.6755267423014588
F1 score at threshold 0.21 is 0.6773686279648801
F1 score at threshold 0.22 is 0.6788736118455844
F1 score at threshold 0.23 is 0.6800933644548183
F1 score at threshold 0.24 is 0.682454251883746
F1 score at threshold 0.25 is 0.682447097124254
F1 score at threshold 0.26 is 0.6828266228430567
F1 score at threshold 0.27 is 0.6841124076503486
F1 score at threshold 0.28 is 0.6839796699853791
F1 score at threshold 0.29 is 0.6852137351086195
F1 score at threshold 0.3

In [16]:
pred_glove_test_y = model.predict([test_X], batch_size=1024, verbose=1)



In [17]:
del word_index, embeddings_index, all_embs, embedding_matrix, model, inp, x
import gc; gc.collect()
time.sleep(10)

In [18]:
EMBEDDING_FILE = 'wiki-news-300d-1M/wiki-news-300d-1M.vec'
def get_coefs(word,*arr): return word, np.asarray(arr, dtype='float32')
embeddings_index = dict(get_coefs(*o.split(" ")) for o in open(EMBEDDING_FILE) if len(o)>100)

all_embs = np.stack(embeddings_index.values())
emb_mean,emb_std = all_embs.mean(), all_embs.std()
embed_size = all_embs.shape[1]

word_index = tokenizer.word_index
nb_words = min(max_features, len(word_index))
embedding_matrix = np.random.normal(emb_mean, emb_std, (nb_words, embed_size))
for word, i in word_index.items():
    if i >= max_features: continue
    embedding_vector = embeddings_index.get(word)
    if embedding_vector is not None: embedding_matrix[i] = embedding_vector
        
inp = Input(shape=(maxlen,))
x = Embedding(max_features, embed_size, weights=[embedding_matrix], trainable=False)(inp)
x = Bidirectional(CuDNNLSTM(128, return_sequences=True))(x)
x = Bidirectional(CuDNNLSTM(64, return_sequences=True))(x)
x = Attention(maxlen)(x)
x = Dense(64, activation="relu")(x)
x = Dense(1, activation="sigmoid")(x)
model = Model(inputs=inp, outputs=x)
model.compile(loss='binary_crossentropy', optimizer=Adam(lr=1e-3), metrics=['accuracy'])
model.fit(train_X, train_y, batch_size=512, epochs=3, validation_data=(val_X, val_y))

Train on 1201632 samples, validate on 104490 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3


<keras.callbacks.History at 0x7fbf4c848a90>

In [19]:
pred_fasttext_val_y = model.predict([val_X], batch_size=1024, verbose=1)
for thresh in np.arange(0.1, 0.501, 0.01):
    thresh = np.round(thresh, 2)
    print("F1 score at threshold {0} is {1}".format(thresh, metrics.f1_score(val_y, (pred_fasttext_val_y>thresh).astype(int))))

F1 score at threshold 0.1 is 0.579749509582013
F1 score at threshold 0.11 is 0.5898812596799173
F1 score at threshold 0.12 is 0.599217676287134
F1 score at threshold 0.13 is 0.6070097748015337
F1 score at threshold 0.14 is 0.6139365918097754
F1 score at threshold 0.15 is 0.619479937233804
F1 score at threshold 0.16 is 0.626599634369287
F1 score at threshold 0.17 is 0.6319517009172182
F1 score at threshold 0.18 is 0.6366961651917403
F1 score at threshold 0.19 is 0.6409641443818204
F1 score at threshold 0.2 is 0.645546484708176
F1 score at threshold 0.21 is 0.648191581286261
F1 score at threshold 0.22 is 0.6509475264244169
F1 score at threshold 0.23 is 0.6537142857142857
F1 score at threshold 0.24 is 0.655008357978655
F1 score at threshold 0.25 is 0.6578947368421053
F1 score at threshold 0.26 is 0.6584530241138489
F1 score at threshold 0.27 is 0.6598267821452365
F1 score at threshold 0.28 is 0.6612696485191931
F1 score at threshold 0.29 is 0.6629834254143647
F1 score at threshold 0.3 is 

In [20]:
pred_fasttext_test_y = model.predict([test_X], batch_size=1024, verbose=1)



In [21]:
del word_index, embeddings_index, all_embs, embedding_matrix, model, inp, x
import gc; gc.collect()
time.sleep(10)

In [22]:
EMBEDDING_FILE = 'paragram_300_sl999/paragram_300_sl999.txt'
def get_coefs(word,*arr): return word, np.asarray(arr, dtype='float32')
embeddings_index = dict(get_coefs(*o.split(" ")) for o in open(EMBEDDING_FILE, encoding="utf8", errors='ignore') if len(o)>100)

all_embs = np.stack(embeddings_index.values())
emb_mean,emb_std = all_embs.mean(), all_embs.std()
embed_size = all_embs.shape[1]

word_index = tokenizer.word_index
nb_words = min(max_features, len(word_index))
embedding_matrix = np.random.normal(emb_mean, emb_std, (nb_words, embed_size))
for word, i in word_index.items():
    if i >= max_features: continue
    embedding_vector = embeddings_index.get(word)
    if embedding_vector is not None: embedding_matrix[i] = embedding_vector
        
inp = Input(shape=(maxlen,))
x = Embedding(max_features, embed_size, weights=[embedding_matrix], trainable=False)(inp)
x = Bidirectional(CuDNNLSTM(128, return_sequences=True))(x)
x = Bidirectional(CuDNNLSTM(64, return_sequences=True))(x)
x = Attention(maxlen)(x)
x = Dense(64, activation="relu")(x)
x = Dense(1, activation="sigmoid")(x)
model = Model(inputs=inp, outputs=x)
model.compile(loss='binary_crossentropy', optimizer=Adam(lr=1e-3), metrics=['accuracy'])

In [23]:
model.fit(train_X, train_y, batch_size=512, epochs=3, validation_data=(val_X, val_y))

Train on 1201632 samples, validate on 104490 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3


<keras.callbacks.History at 0x7fbf4ba3fd30>

In [24]:
pred_paragram_val_y = model.predict([val_X], batch_size=1024, verbose=1)
for thresh in np.arange(0.1, 0.501, 0.01):
    thresh = np.round(thresh, 2)
    print("F1 score at threshold {0} is {1}".format(thresh, metrics.f1_score(val_y, (pred_paragram_val_y>thresh).astype(int))))

F1 score at threshold 0.1 is 0.5946696851203952
F1 score at threshold 0.11 is 0.6042960935032115
F1 score at threshold 0.12 is 0.6123985160492499
F1 score at threshold 0.13 is 0.6189902529843391
F1 score at threshold 0.14 is 0.6257033034371344
F1 score at threshold 0.15 is 0.6322346811643448
F1 score at threshold 0.16 is 0.6385973007267275
F1 score at threshold 0.17 is 0.6444496597042948
F1 score at threshold 0.18 is 0.6489608765557078
F1 score at threshold 0.19 is 0.6531921331316188
F1 score at threshold 0.2 is 0.6566542021700484
F1 score at threshold 0.21 is 0.6591741695125738
F1 score at threshold 0.22 is 0.6610873395419079
F1 score at threshold 0.23 is 0.664710385302373
F1 score at threshold 0.24 is 0.6664943457189015
F1 score at threshold 0.25 is 0.6677126233902072
F1 score at threshold 0.26 is 0.6683855612852042
F1 score at threshold 0.27 is 0.6701472556894243
F1 score at threshold 0.28 is 0.6718982546340143
F1 score at threshold 0.29 is 0.6725458025704129
F1 score at threshold 0

In [25]:
pred_paragram_test_y = model.predict([test_X], batch_size=1024, verbose=1)



In [26]:
del word_index, embeddings_index, all_embs, embedding_matrix, model, inp, x
import gc; gc.collect()
time.sleep(10)

In [49]:
# convert string to lower case
train_texts = train_df['question_text'].values
train_texts = [s.lower() for s in train_texts]

val_texts = val_df['question_text'].values
val_texts = [s.lower() for s in val_texts]

# =======================Convert string to index================
# Tokenizer
tk = Tokenizer(num_words=None, char_level=True, oov_token='UNK')
tk.fit_on_texts(train_texts)
# If we already have a character list, then replace the tk.word_index
# If not, just skip below part

# -----------------------Skip part start--------------------------
# construct a new vocabulary
alphabet = "abcdefghijklmnopqrstuvwxyz0123456789,;.!?:'\"/\\|_@#$%^&*~`+-=<>()[]{}"
char_dict = {}
for i, char in enumerate(alphabet):
    char_dict[char] = i + 1

# Use char_dict to replace the tk.word_index
tk.word_index = char_dict.copy()
# Add 'UNK' to the vocabulary
tk.word_index[tk.oov_token] = max(char_dict.values()) + 1
# -----------------------Skip part end----------------------------

# Convert string to index
train_sequences = tk.texts_to_sequences(train_texts)
val_texts = tk.texts_to_sequences(val_texts)

# Padding
train_data = pad_sequences(train_sequences, maxlen=500, padding='post')
val_data = pad_sequences(val_texts, maxlen=500, padding='post')

# Convert to numpy array
train_data = np.array(train_data, dtype='float32')
val_data = np.array(val_data, dtype='float32')

#=======================Get classes================
train_classes = train_df['target'].values
train_class_list = [x for x in train_classes]
val_classes = val_df['target'].values
val_class_list = [x for x in val_classes]
# from keras.utils import to_categorical
# train_classes = to_categorical(train_class_list)
# val_classes = to_categorical(val_class_list)

In [51]:
vocab_size = len(tk.word_index)

In [52]:
embedding_weights =  [] #(70,69)
embedding_weights.append(np.zeros(vocab_size))

for char,i in tk.word_index.items():
    onehot = np.zeros(vocab_size)
    onehot[i-1] = 1
    embedding_weights.append(onehot)

embedding_weights = np.array(embedding_weights)

In [53]:
#parameter
input_size = 500
embedding_size = 69
conv_layers = [[256,7,3],
              [256,7,3],
              [256,3,-1],
              [256,3,-1],
              [256,3,-1],
              [256,3,3]]

fully_connected_layers = [1024]
dropout_p = 0.5
optimizer = 'adam'
loss = 'binary_crossentropy'

In [54]:
#embedding layer initialization
embedding_layer = Embedding(vocab_size+1, embedding_size, input_length=input_size, 
                            weights=[embedding_weights])

In [55]:
#Model
from keras.layers import Input, Embedding, Activation, Flatten, Dense
from keras.layers import Conv1D, MaxPooling1D, Dropout

#Input
inputs = Input(shape=(input_size,), name='input', dtype='int64')
#Embedding
x = embedding_layer(inputs)
#Conv
for filter_num, filter_size, pooling_size in conv_layers:
    x = Conv1D(filter_num, filter_size)(x)
    x = Activation('relu')(x)
    if pooling_size != -1:
        x = MaxPooling1D(pool_size=pooling_size)(x)
x = Flatten()(x)

#FC layers
for dense_size in fully_connected_layers:
    x = Dense(dense_size, activation='relu')(x)
    x = Dropout(dropout_p)(x)
    
#ouput
x = Dense(1, activation="sigmoid")(x)

#build
model = Model(inputs=inputs, outputs=x)
model.compile(loss='binary_crossentropy', optimizer=Adam(lr=1e-3), metrics=['accuracy'])
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input (InputLayer)           (None, 500)               0         
_________________________________________________________________
embedding_7 (Embedding)      (None, 500, 69)           4830      
_________________________________________________________________
conv1d_19 (Conv1D)           (None, 494, 256)          123904    
_________________________________________________________________
activation_19 (Activation)   (None, 494, 256)          0         
_________________________________________________________________
max_pooling1d_10 (MaxPooling (None, 164, 256)          0         
_________________________________________________________________
conv1d_20 (Conv1D)           (None, 158, 256)          459008    
_________________________________________________________________
activation_20 (Activation)   (None, 158, 256)          0         
__________

In [56]:
#Shuffle
indices = np.arange(train_data.shape[0])
np.random.shuffle(indices)

x_train = train_data[indices]
y_train = train_classes[indices]

x_val = val_data
y_val = val_classes

In [57]:
#Training
model.fit(x_train, y_train,
          validation_data=(x_val, y_val),
          batch_size=128,
          epochs=10,
          verbose=2)

Train on 1201632 samples, validate on 104490 samples
Epoch 1/10
 - 288s - loss: 0.1372 - acc: 0.9490 - val_loss: 0.1246 - val_acc: 0.9517
Epoch 2/10
 - 287s - loss: 0.1187 - acc: 0.9538 - val_loss: 0.1174 - val_acc: 0.9532
Epoch 3/10
 - 287s - loss: 0.1128 - acc: 0.9556 - val_loss: 0.1202 - val_acc: 0.9540
Epoch 4/10
 - 287s - loss: 0.1078 - acc: 0.9573 - val_loss: 0.1189 - val_acc: 0.9532
Epoch 5/10
 - 287s - loss: 0.1025 - acc: 0.9593 - val_loss: 0.1176 - val_acc: 0.9538
Epoch 6/10
 - 287s - loss: 0.0968 - acc: 0.9616 - val_loss: 0.1201 - val_acc: 0.9544
Epoch 7/10
 - 287s - loss: 0.0899 - acc: 0.9641 - val_loss: 0.1233 - val_acc: 0.9538
Epoch 8/10
 - 287s - loss: 0.0833 - acc: 0.9669 - val_loss: 0.1276 - val_acc: 0.9532
Epoch 9/10
 - 288s - loss: 0.0755 - acc: 0.9703 - val_loss: 0.1297 - val_acc: 0.9516
Epoch 10/10
 - 286s - loss: 0.0685 - acc: 0.9731 - val_loss: 0.1452 - val_acc: 0.9524


<keras.callbacks.History at 0x7fbf48e6acc0>

In [60]:
pred_char_cnn_val = model.predict([x_val], batch_size=1024, verbose=1)
for thresh in np.arange(0.1, 0.501, 0.01):
    thresh = np.round(thresh, 2)
    print("F1 score at threshold {0} is {1}".format(thresh, metrics.f1_score(y_val, (pred_char_cnn_val>thresh).astype(int))))

F1 score at threshold 0.1 is 0.5930435909061467
F1 score at threshold 0.11 is 0.5959329195827281
F1 score at threshold 0.12 is 0.5969346598547997
F1 score at threshold 0.13 is 0.5978565089767218
F1 score at threshold 0.14 is 0.5988239363542027
F1 score at threshold 0.15 is 0.5989477376359172
F1 score at threshold 0.16 is 0.5978284011070896
F1 score at threshold 0.17 is 0.5987224574750591
F1 score at threshold 0.18 is 0.5987667754805948
F1 score at threshold 0.19 is 0.5989023051591656
F1 score at threshold 0.2 is 0.5975330526626782
F1 score at threshold 0.21 is 0.5970750634233696
F1 score at threshold 0.22 is 0.5963855421686747
F1 score at threshold 0.23 is 0.5963498098859316
F1 score at threshold 0.24 is 0.5957055214723926
F1 score at threshold 0.25 is 0.5953301376217721
F1 score at threshold 0.26 is 0.5934219734079775
F1 score at threshold 0.27 is 0.5918031502233366
F1 score at threshold 0.28 is 0.5920896818504777
F1 score at threshold 0.29 is 0.5913430935709739
F1 score at threshold 

In [None]:
pred_char_cnn_test = model.predict([test_X], batch_size=1024, verbose=1)

In [None]:
pred_val_y = (4 * pred_glove_val_y + pred_fasttext_val_y + 3 * pred_paragram_val_y + 2 * pred_cnn_val_y) / 10.0

thresholds = []
for thresh in np.arange(0.1, 0.501, 0.01):
    thresh = np.round(thresh, 2)
    res = metrics.f1_score(val_y, (pred_val_y > thresh).astype(int))
    thresholds.append([thresh, res])
    print("F1 score at threshold {0} is {1}".format(thresh, res))
    
thresholds.sort(key=lambda x: x[1], reverse=True)
best_thresh = thresholds[0][0]
print("Best threshold: ", best_thresh)