# FastText Models

In [None]:
import numpy as np
import pandas as pd

from architectures import BidAttentionLstm, BidMaxPoolGru, BidConvPoolGru
from helpers import make_df, make_embed_vec, predict_and_save

from sklearn.model_selection import train_test_split

np.random.seed(7)

max_features = 30000
maxlen = 150
embed_size = 300
list_classes = ["toxic", "severe_toxic", "obscene", "threat", "insult",
                "identity_hate"]

xtr, xte, y, word_index, tokenizer = make_df("./input/train.csv",
                                  "./input/test.csv",
                                  max_features, maxlen, list_classes)



embedding_vector = make_embed_vec("./input/crawl-300d-2M.vec",
                                 max_features, embed_size, word_index, tokenizer)

[xtr, xval, y, yval] = train_test_split(xtr, y, train_size=0.90, random_state=233)

## Callbacks

In [None]:
from helpers import RocAucEvaluation
from keras.callbacks import EarlyStopping, ModelCheckpoint

file_path = "./modelckpts/.model.{epoch:02d}.hdf5"

ckpt = ModelCheckpoint(file_path, monitor='val_loss', verbose=2,
                        mode='min')
# I've decided not to use Early Stopping, since it doesn't monitor ROC/AUC score.
early = EarlyStopping(monitor="val_loss", mode="min", patience=3)
roc = RocAucEvaluation(validation_data=(xval, yval), interval=1)

# CapsNet

In [None]:
from architectures import CapsuleModel

model = CapsuleModel(maxlen, max_features, embed_size, embedding_vector)

In [None]:
model.fit(xtr, y, batch_size=512, epochs=10, validation_data=(xval, yval),
          callbacks=[ckpt, roc], verbose=1)

In [None]:
predict_and_save(model, xte, '09', 'capsnet_01')

# Bidirectional LSTM with Attention

In [None]:
from architectures import BidAttentionLstm
model = BidAttentionLstm(maxlen, max_features, embed_size, embedding_vector)

In [None]:
model.fit(xtr, y, batch_size=512, epochs=30, validation_data=(xval, yval),
          callbacks=[ckpt, roc], verbose=1)

In [None]:
predict_and_save(model, xte, '03', 'ft_bidlstm_02')

# Bidirectional GRU model with Max Pooling

In [None]:
model = BidMaxPoolGru(maxlen, max_features, embed_size, embedding_vector)
model.fit(xtr, y, batch_size=1024, epochs=20, validation_data=(xval, yval),
          callbacks=[ckpt, roc], verbose=2)

In [None]:
predict_and_save(model, xte, '03', 'ft_bidgru_02')

# ConvCapsule

In [None]:
from architectures import ConvCapsule
model = ConvCapsule(maxlen, max_features, embed_size, embedding_vector)
model.fit(xtr, y, batch_size=64, epochs=10, validation_data=(xval, yval),
          callbacks=[ckpt, roc], verbose=1)

In [None]:
predict_and_save(model, xte, '08', 'ft_convcapsule_01')