# GloVe 



In [1]:
import numpy as np
import pandas as pd
from keras.callbacks import EarlyStopping, ModelCheckpoint
from bidlstm import BidLstm
from helpers import make_df, make_glovevec

np.random.seed(7)

max_features = 100000
maxlen = 150
embed_size = 300
list_classes = ["toxic", "severe_toxic", "obscene", "threat", "insult",
                "identity_hate"]

xtr, xte, y, word_index = make_df("./input/train.csv",
                                  "./input/test.csv",
                                  max_features, maxlen, list_classes)

embedding_vector = make_glovevec("./input/glove.840B.300d.txt",
                                 max_features, embed_size, word_index)

model = BidLstm(maxlen, max_features, embed_size, embedding_vector)
model.compile(loss='binary_crossentropy', optimizer='adam',
              metrics=['accuracy'])
file_path = ".model.hdf5"
ckpt = ModelCheckpoint(file_path, monitor='val_loss', verbose=1,
                       save_best_only=True, mode='min')
early = EarlyStopping(monitor="val_loss", mode="min", patience=3)
model.fit(xtr, y, batch_size=512, epochs=20, validation_split=0.1, callbacks=[ckpt, early])
model.load_weights(file_path)
print("Predicting with model...")
y_test = model.predict(xte)
sample_submission = pd.read_csv("./input/sample_submission.csv")
sample_submission[list_classes] = y_test
print("Saving to submission file...")
sample_submission.to_csv("./submissions/bidlstm_05.csv", index=False)

    

Using TensorFlow backend.


[[ 0.          0.          0.         ...,  0.          0.          0.        ]
 [ 0.27204001 -0.06203    -0.1884     ...,  0.13015001 -0.18317001  0.1323    ]
 [ 0.31924     0.06316    -0.27858001 ...,  0.082745    0.097801
   0.25044999]
 ..., 
 [ 0.          0.          0.         ...,  0.          0.          0.        ]
 [ 0.          0.          0.         ...,  0.          0.          0.        ]
 [-0.37496999 -0.37419999  0.067547   ..., -0.026452   -0.23654    -0.037388  ]]
Train on 143613 samples, validate on 15958 samples
Epoch 1/20
Epoch 00001: val_loss improved from inf to 0.04957, saving model to .model.hdf5
Epoch 2/20
Epoch 00002: val_loss improved from 0.04957 to 0.04544, saving model to .model.hdf5
Epoch 3/20
Epoch 00003: val_loss improved from 0.04544 to 0.04388, saving model to .model.hdf5
Epoch 4/20
Epoch 00004: val_loss improved from 0.04388 to 0.04307, saving model to .model.hdf5
Epoch 5/20
Epoch 00005: val_loss improved from 0.04307 to 0.04211, saving model to .m

###### 