In [10]:
import numpy as np
import pandas as pd
import keras

In [11]:
def load_data():
    from keras.preprocessing import sequence
    from keras.datasets import imdb
    (X_train, y_train), (X_test, y_test) = imdb.load_data(num_words = 5000)
    X_train = sequence.pad_sequences(X_train, maxlen = 500, padding = "post", truncating = "post")
    X_test = sequence.pad_sequences(X_test, maxlen = 500, padding = "post", truncating = "post")
    
    vocabulary = imdb.get_word_index()
    vocabulary_inv = dict((v, k) for k, v in vocabulary.items())
    vocabulary_inv[0] = '<PAD/>'
    return X_train, y_train, X_test, y_test, vocabulary_inv

In [12]:
X_train, y_train, X_test, y_test, vocabulary_inv = load_data()

In [13]:
X_train.shape, y_train.shape

((25000, 500), (25000,))

In [14]:
def train_word2vec(sentence_matrix, vocabulary_inv, num_features = 300, min_word_count = 1, context = 10):
    from gensim.models import word2vec
    num_workers = 2
    downsampling = 1e-3
    
    print("Training Word2Vec model...")
    sentences = [[vocabulary_inv[w] for w in s] for s in sentence_matrix]
    embedding_model = word2vec.Word2Vec(sentences, workers = num_workers, size = num_features, 
                                        min_count = min_word_count, window = context, sample = downsampling)
    embedding_model.init_sims(replace = True)
    
    num_words = sentence_matrix.max()
    embedding_weights = []
    print("num_words : {}".format(num_words))
    for i in range(num_words + 1):
        word = vocabulary_inv[i]
        if word in embedding_model:
            embedding_weights.append(embedding_model[word])
        else:
            print("word : {}".format(word))
            embedding_weights.append(np.random.uniform(-0.25, 0.25, embedding_model.vector_size))
    print("embedding_weights len : {}".format(len(embedding_weights)))
    return np.vstack(embedding_weights)

In [15]:
embedding_weights = train_word2vec(np.vstack((X_train, X_test)), vocabulary_inv, num_features = 50, min_word_count = 1, context = 10)

Training Word2Vec model...
num_words : 4999
word : a
embedding_weights len : 5000




In [16]:
embedding_weights.shape

(5000, 50)

In [17]:
X_train.shape

(25000, 500)

In [18]:
from keras import backend as K
def calc_recall_score(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
    recall = true_positives / (possible_positives + K.epsilon())
    return recall

def calc_precision_score(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
    precision = true_positives / (predicted_positives + K.epsilon())
    return precision

def calc_f1_score(y_true, y_pred):
    precision = calc_precision_score(y_true, y_pred)
    recall = calc_recall_score(y_true, y_pred)
    return 2 * ((precision * recall) / (precision + recall + K.epsilon()))

In [26]:
def _bn_relu(input):
    from keras.layers.normalization import BatchNormalization
    from keras.layers import Activation
    norm = BatchNormalization(axis = 2)(input)
    return Activation("relu")(norm)


def _bn_relu_conv(**conv_params):
    from keras.regularizers import l2
    from keras.layers import Conv1D
    filters = conv_params['filters']
    kernel_size = conv_params['kernel_size']
    strides = conv_params.setdefault("strides", 1)
    kernel_initializer = conv_params.setdefault("kernel_initializer", "he_normal")
    padding = conv_params.setdefault("padding", "same")
    kernel_regularizer = conv_params.setdefault("kernel_regularizer", l2(1e-4))
    
    def f(input):
        activation = _bn_relu(input)
        return Conv1D(filters = filters, kernel_size = kernel_size,
                     strides = strides, padding = padding,
                     kernel_initializer = kernel_initializer,
                     kernel_regularizer = kernel_regularizer)(activation)
    
    return f


def _shortcut(input, residual):
    import keras.backend as K
    from keras.layers import Conv1D
    from keras.layers.merge import add
    from keras.regularizers import l2
    input_shape = K.int_shape(input)
    residual_shape = K.int_shape(residual)
    
    strides = int(round(input_shape[1] / residual_shape[1]))
    equal_channels = input_shape[2] == residual_shape[2]
    
    shortcut = input
    # 1 * 1 conv if shape is different. Else identity
    if strides > 1 or not equal_channels:
        shortcut = Conv1D(filters = residual_shape[2],
                         kernel_size = 1,
                         strides = strides,
                         padding = "valid",
                         kernel_initializer = "he_normal",
                         kernel_regularizer = l2(1e-4))(input)
    return add([shortcut, residual])
    
    
def basic_block(filters, init_strides = 1, is_first_block_of_first_layer = False):
    from keras.regularizers import l2
    from keras.layers import Conv1D
    def f(input):
        if is_first_block_of_first_layer:
            # don't repeat bn->relu since we just did bn->relu->maxpool
            conv1 = Conv1D(filters = filters, kernel_size = 3,
                          strides = init_strides,
                          padding = "same",
                          kernel_initializer = "he_normal",
                          kernel_regularizer = l2(1e-4))(input)
        else:
            conv1 = _bn_relu_conv(filters = filters, kernel_size = 3,
                                 strides = init_strides)(input)

        residual = _bn_relu_conv(filters = filters, kernel_size = 3)(conv1)
        return _shortcut(input, residual)
    
    return f


def _residual_block(filters, repetitions, is_first_layer = False):
    def f(input):
        for i in range(repetitions):
            init_strides = 1
            if i == 0 and not is_first_layer:
                init_strides = 2
            input = basic_block(filters, init_strides = init_strides, is_first_block_of_first_layer = (is_first_layer and i == 0))(input)
        return input
    
    return f
        
    
def _conv_bn_relu(**conv_params):
    from keras.layers import Conv1D
    from keras.regularizers import l2
    filters = conv_params['filters']
    kernel_size = conv_params['kernel_size']
    strides = conv_params.setdefault('strides', 1)
    kernel_initializer = conv_params.setdefault('kernel_initializer', 'he_normal')
    padding = conv_params.setdefault('padding', 'same')
    kernel_regularizer = conv_params.setdefault('kernel_regularizer', l2(1e-4))
    
    def f(input):
        conv = Conv1D(filters = filters, kernel_size = kernel_size,
                     strides = strides, padding = padding,
                     kernel_initializer = kernel_initializer,
                     kernel_regularizer = kernel_regularizer)(input)
        return _bn_relu(conv)
    return f


def build_model(embedding_weights):
    from keras.layers import Input, MaxPooling1D, AveragePooling1D, Flatten, Dense, Embedding
    from keras.models import Model
    from keras.optimizers import Adam
    import keras.backend as K
    input = Input(shape = (500, ))
    embedding = Embedding(embedding_weights.shape[0], embedding_weights.shape[1], input_length = 500, name = 'embedding')(input)
    conv1 = _conv_bn_relu(filters = 64, kernel_size = 5, strides = 1)(embedding)
    pool1 = MaxPooling1D(pool_size = 3, strides = 2, padding = "same")(conv1)
    
    block = pool1
    filters = 64
    for i, r in enumerate([2, 2, 2, 2]):
        block = _residual_block(filters, repetitions = r, is_first_layer = (i == 0))(block)
        filters *= 2
        
    block = _bn_relu(block)
    
    # Classifier block
    block_shape = K.int_shape(block)
    pool2 = AveragePooling1D(pool_size = (block_shape[1]), strides = 1)(block)
    flatten1 = Flatten()(pool2)
    dense = Dense(1, kernel_initializer = "he_normal", activation = "sigmoid")(flatten1)
    model = Model(inputs = input, outputs = dense)
    
    optimizer = Adam(lr = 0.001, clipnorm = 1)
    model.compile(loss = 'binary_crossentropy', optimizer = optimizer, metrics = [calc_precision_score, calc_recall_score, calc_f1_score])
    embedding_layer = model.get_layer("embedding")
    embedding_layer.set_weights([embedding_weights])
    return model

In [27]:
model = build_model(embedding_weights)

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_dev, y_train, y_dev = train_test_split(X_train, y_train, test_size = 0.3)

In [None]:
import keras
stopping = keras.callbacks.EarlyStopping(patience = 8)
reduce_lr = keras.callbacks.ReduceLROnPlateau(factor = 0.1, patience = 2, min_lr = 0.001 * 0.001)
checkpointer = keras.callbacks.ModelCheckpoint(
    filepath = "./text-cnn.model",
    save_best_only = True
)

In [None]:
hist = model.fit(X_train, y_train, epochs = 20, batch_size = 32, validation_data = (X_dev, y_dev), 
                 callbacks = [stopping, reduce_lr, checkpointer])

In [28]:
from keras.utils import plot_model

In [29]:
plot_model(model, to_file = 'model_auth.png', show_shapes= True)

OSError: `pydot` failed to call GraphViz.Please install GraphViz (https://www.graphviz.org/) and ensure that its executables are in the $PATH.