In [1]:
from keras.preprocessing import sequence
from keras.utils import to_categorical
from keras.models import Model, load_model
from keras.layers import Conv1D, Dropout, Dense, Input, Embedding, MaxPooling1D, Flatten, BatchNormalization, Activation
from keras.callbacks import ModelCheckpoint
from sklearn.model_selection import train_test_split

import mxnet as mx
from mxnet import gluon
from mxnet import autograd

import pickle
import numpy as np
import time
import math

Using TensorFlow backend.


In [2]:
def time_since(start):
    now = time.time()
    s = now - start
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)

In [20]:
MAX_WORDS_IN_SEQ = 3000
EMBED_DIM = 32
MODEL_PATH = "model/spam_detect_char"
ctx = mx.cpu()

In [4]:
with open("data/dataset.pkl", 'rb') as f:
    sequences, labels, word2index = pickle.load(f)
    
num_words = len(word2index)
print(f"Found {num_words} unique tokens")

Found 43 unique tokens


In [5]:
data = sequence.pad_sequences(sequences, maxlen=MAX_WORDS_IN_SEQ, padding='post', truncating='post')
targets = to_categorical(labels)

In [6]:
print('Shape of data tensor:', data.shape)
print('Shape of label tensor:', targets.shape)
x_train, x_test, y_train, y_test = train_test_split(data, targets, test_size=0.25)

Shape of data tensor: (33716, 3000)
Shape of label tensor: (33716, 2)


In [15]:
input_seq = Input(shape=[MAX_WORDS_IN_SEQ, ], dtype='int32')
embed_seq = Embedding(num_words + 1, EMBED_DIM, input_length=MAX_WORDS_IN_SEQ)(
    input_seq)
conv_1 = Conv1D(128, 5)(embed_seq)
conv_1 = BatchNormalization()(conv_1)
conv_1 = Activation(activation='relu')(conv_1)
conv_1 = MaxPooling1D(pool_size=5)(conv_1)

conv_2 = Conv1D(128, 5)(conv_1)
conv_2 = BatchNormalization()(conv_2)
conv_2 = Activation(activation='relu')(conv_2)
conv_2 = MaxPooling1D(pool_size=5)(conv_2)

conv_3 = Conv1D(128, 5)(conv_2)
conv_3 = BatchNormalization()(conv_3)
conv_3 = Activation(activation='relu')(conv_3)
conv_3 = MaxPooling1D(pool_size=35)(conv_3)

flat = Flatten()(conv_3)
flat = Dropout(0.25)(flat)
fc1 = Dense(128, activation='relu')(flat)
dense_1 = Dropout(0.25)(flat)
fc2 = Dense(2, activation='softmax')(fc1)

model = Model(input_seq, fc2)
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['acc'])

  if d.decorator_argspec is not None), _inspect.getargspec(target))


In [None]:
model = load_model(MODEL_PATH)
model.fit(
    x_train,
    y_train,
    batch_size=128,
    epochs=5,
    callbacks=[ModelCheckpoint(MODEL_PATH, save_best_only=True)],
    validation_data=[x_test, y_test]
)

model.save(MODEL_PATH)

  if d.decorator_argspec is not None), _inspect.getargspec(target))


Train on 25287 samples, validate on 8429 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5

## MXNET Implementation

In [7]:
class MxModel(gluon.HybridBlock):
    def __init__(self, **kwargs):
        super(MxModel, self).__init__(**kwargs)
        with self.name_scope():
            self.embed = gluon.nn.Embedding(input_dim=num_words + 1, output_dim=EMBED_DIM)
            
            self.conv1 = gluon.nn.Conv1D(channels=128, kernel_size=5)
            self.conv2 = gluon.nn.Conv1D(channels=128, kernel_size=5)
            self.conv3 = gluon.nn.Conv1D(channels=128, kernel_size=5)
            
            self.bnorm1 = gluon.nn.BatchNorm()
            self.bnorm2 = gluon.nn.BatchNorm()
            self.bnorm3 = gluon.nn.BatchNorm()
            
            self.fc1 = gluon.nn.Dense(units=128)
            self.fc2 = gluon.nn.Dense(units=2)
            
            self.dropout = gluon.nn.Dropout(rate=0.25)
    def hybrid_forward(self, F, x, *args, **kwargs):
        x = self.embed(x)
        x = F.relu(self.bnorm1(self.conv1(x)))
        x = F.relu(self.bnorm2(self.conv2(x)))
        x = F.relu(self.bnorm3(self.conv3(x)))
        x = F.relu(self.dropout(self.fc1(x)))
        x = self.dropout(self.fc2(x))
        return x
            

In [8]:
mx_model = MxModel()
mx_model.collect_params().initialize(mx.init.Xavier(magnitude=2.24), ctx=ctx)

In [9]:
softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss(sparse_label=False)
trainer = gluon.Trainer(mx_model.collect_params(), 'adam', {'learning_rate': 0.001})
acc = mx.metric.Accuracy()

In [10]:
train_data = mx.io.NDArrayIter(data=x_train, label=y_train, batch_size=128, shuffle=True)
test_data = mx.io.NDArrayIter(data=x_test, label=y_test, batch_size=128, shuffle=False)

In [14]:
def evaluate_accuracy(data_iterator, net):
    data_iterator.reset()
    acc_test = mx.metric.Accuracy()
    for batch in data_iterator:
        data = batch.data[0].as_in_context(ctx)
        label = batch.label[0].as_in_context(ctx)
        output = net(data)
        acc_test.update(preds=output, labels=label)
    return acc_test.get()[1]

In [13]:
epochs = 2
smoothing_constant = .01
mx_model.hybridize()

start = time.time()

for e in range(epochs):
    print(f"Epoch {e+1}--------------")
    i = 0
    train_data.reset()
    for batch in train_data:
        data = batch.data[0].as_in_context(ctx)
        label = batch.label[0].as_in_context(ctx)
        with autograd.record():
            output = mx_model(data)
            loss = softmax_cross_entropy(output, label)
        loss.backward()
        trainer.step(data.shape[0])

        ##########################
        #  Keep a moving average of the losses
        ##########################
        curr_loss = mx.nd.mean(loss).asscalar()
        acc.update(preds=output, labels=label)
        if i % 50 == 0:
            print(f"loss: {curr_loss}    acc:{acc.get()[1]}")
        i += 1
    print(f"val acc: {evaluate_accuracy(test_data, mx_model)}")
    print(time_since(start))
    acc.reset()
    
print(time_since(start))
mx_model.save_params("data/mx_model")

Epoch 1--------------
loss: 0.7064136266708374    acc:0.5
loss: 0.6858754754066467    acc:0.5
loss: 0.6862083077430725    acc:0.5000386757425742
loss: 0.6491576433181763    acc:0.5009054221854304
val acc: 0.5065104166666666
14m 19s
Epoch 2--------------
loss: 0.6357454061508179    acc:0.50390625
loss: 0.6397137641906738    acc:0.5003063725490197
loss: 0.6163472533226013    acc:0.49176206683168316
loss: 0.5043906569480896    acc:0.47775248344370863
val acc: nan
27m 20s
27m 20s
