In [None]:
from keras.preprocessing import sequence
from keras.utils import to_categorical
from keras.models import Model, load_model
from keras.layers import Conv1D, Dropout, Dense, Input, Embedding, MaxPooling1D, Flatten, BatchNormalization, Activation
from keras.callbacks import ModelCheckpoint
from sklearn.model_selection import train_test_split

import mxnet as mx
from mxnet import gluon

import pickle
import numpy as np

In [None]:
MAX_WORDS_IN_SEQ = 3000
EMBED_DIM = 32
MODEL_PATH = "models/spam_detect_char"

In [None]:
with open("data/dataset.pkl", 'rb') as f:
    sequences, labels, word2index = pickle.load(f)
    
num_words = len(word2index)
print(f"Found {num_words} unique tokens")

In [None]:
data = sequence.pad_sequences(sequences, maxlen=MAX_WORDS_IN_SEQ, padding='post', truncating='post')
targets = to_categorical(labels)

In [None]:
print('Shape of data tensor:', data.shape)
print('Shape of label tensor:', targets.shape)
x_train, x_test, y_train, y_test = train_test_split(data, targets, test_size=0.25)

In [None]:
input_seq = Input(shape=[MAX_WORDS_IN_SEQ, ], dtype='int32')
embed_seq = Embedding(num_words, EMBED_DIM, input_length=MAX_WORDS_IN_SEQ)(
    input_seq)
conv_1 = Conv1D(128, 5)(embed_seq)
conv_1 = BatchNormalization()(conv_1)
conv_1 = Activation(activation='relu')(conv_1)
conv_1 = MaxPooling1D(pool_size=5)(conv_1)

conv_2 = Conv1D(128, 5)(conv_1)
conv_2 = BatchNormalization()(conv_2)
conv_2 = Activation(activation='relu')(conv_2)
conv_2 = MaxPooling1D(pool_size=5)(conv_2)

conv_3 = Conv1D(128, 5)(conv_2)
conv_3 = BatchNormalization()(conv_3)
conv_3 = Activation(activation='relu')(conv_3)
conv_3 = MaxPooling1D(pool_size=35)(conv_3)

flat = Flatten()(conv_3)
flat = Dropout(0.25)(flat)
fc1 = Dense(128, activation='relu')(flat)
dense_1 = Dropout(0.25)(flat)
fc2 = Dense(2, activation='softmax')(fc1)

model = Model(input_seq, fc2)
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['acc'])

In [None]:
model.fit(
    x_train,
    y_train,
    batch_size=128,
    epochs=2,
    callbacks=[ModelCheckpoint(MODEL_PATH, save_best_only=True)],
    validation_data=[x_test, y_test]
)

In [None]:
model.layers[7].output_shape