## Imports

In [1]:
import datetime
import gc
import os

In [2]:
from sklearn.model_selection import StratifiedKFold

In [3]:
cuda_use_gpus(0)

In [4]:
from keras import backend as K
from keras.models import Model, Sequential
from keras.layers import *
from keras.callbacks import EarlyStopping, ModelCheckpoint

Using TensorFlow backend.


## Config

In [5]:
feature_list_id = 'oofp_currie32_cnn'

In [6]:
RANDOM_SEED = 42

In [7]:
np.random.seed(RANDOM_SEED)

In [8]:
data_folder = os.path.abspath(os.path.join(os.curdir, os.pardir, 'data')) + os.path.sep
aux_data_folder = os.path.join(data_folder, 'aux') + os.path.sep
preproc_data_folder = os.path.join(data_folder, 'preproc') + os.path.sep
features_data_folder = os.path.join(data_folder, 'features') + os.path.sep
submissions_data_folder = os.path.join(data_folder, 'submissions') + os.path.sep

## Read Data

In [9]:
embedding_matrix = load(aux_data_folder + 'embedding_weights_fasttext_filtered_no_stopwords.pickle')

In [10]:
X_train_q1 = load(features_data_folder + 'X_train_nn_fasttext_q1_filtered_no_stopwords.pickle')
X_train_q2 = load(features_data_folder + 'X_train_nn_fasttext_q2_filtered_no_stopwords.pickle')

In [11]:
X_test_q1 = load(features_data_folder + 'X_test_nn_fasttext_q1_filtered_no_stopwords.pickle')
X_test_q2 = load(features_data_folder + 'X_test_nn_fasttext_q2_filtered_no_stopwords.pickle')

In [12]:
y_train = load(features_data_folder + 'y_train.pickle')

In [13]:
EMBEDDING_DIM = embedding_matrix.shape[-1]
VOCAB_LENGTH = embedding_matrix.shape[0]
MAX_SEQUENCE_LENGTH = X_train_q1.shape[-1]

In [14]:
print(EMBEDDING_DIM, VOCAB_LENGTH, MAX_SEQUENCE_LENGTH)

300 101442 30


## Train Models & Compute Out-of-Fold Predictions

In [15]:
def create_model():   
    units = 128 # Number of nodes in the Dense layers
    dropout = 0.25 # Percentage of nodes to drop
    nb_filter = 32 # Number of filters to use in Convolution1D
    filter_length = 3 # Length of filter for Convolution1D
    # Initialize weights and biases for the Dense layers
    
    weights = initializers.TruncatedNormal(mean=0.0, stddev=0.05, seed=2)
    bias = bias_initializer='zeros'

    model1 = Sequential()
    model1.add(Embedding(VOCAB_LENGTH,
                         EMBEDDING_DIM,
                         weights=[embedding_matrix],
                         input_length = MAX_SEQUENCE_LENGTH,
                         trainable = False))

    model1.add(Convolution1D(filters = nb_filter, 
                             kernel_size = filter_length, 
                             padding = 'same'))
    model1.add(BatchNormalization())
    model1.add(Activation('relu'))
    model1.add(Dropout(dropout))

    model1.add(Convolution1D(filters = nb_filter, 
                             kernel_size = filter_length, 
                             padding = 'same'))
    model1.add(BatchNormalization())
    model1.add(Activation('relu'))
    model1.add(Dropout(dropout))

    model1.add(Flatten())



    model2 = Sequential()
    model2.add(Embedding(VOCAB_LENGTH,
                         EMBEDDING_DIM,
                         weights=[embedding_matrix],
                         input_length = MAX_SEQUENCE_LENGTH,
                         trainable = False))

    model2.add(Convolution1D(filters = nb_filter, 
                             kernel_size = filter_length, 
                             padding = 'same'))
    model2.add(BatchNormalization())
    model2.add(Activation('relu'))
    model2.add(Dropout(dropout))

    model2.add(Convolution1D(filters = nb_filter, 
                             kernel_size = filter_length, 
                             padding = 'same'))
    model2.add(BatchNormalization())
    model2.add(Activation('relu'))
    model2.add(Dropout(dropout))

    model2.add(Flatten())



    model3 = Sequential()
    model3.add(Embedding(VOCAB_LENGTH,
                         EMBEDDING_DIM,
                         weights=[embedding_matrix],
                         input_length = MAX_SEQUENCE_LENGTH,
                         trainable = False))
    model3.add(TimeDistributed(Dense(EMBEDDING_DIM)))
    model3.add(BatchNormalization())
    model3.add(Activation('relu'))
    model3.add(Dropout(dropout))
    model3.add(Lambda(lambda x: K.max(x, axis=1), output_shape=(EMBEDDING_DIM, )))



    model4 = Sequential()
    model4.add(Embedding(VOCAB_LENGTH,
                         EMBEDDING_DIM,
                         weights=[embedding_matrix],
                         input_length = MAX_SEQUENCE_LENGTH,
                         trainable = False))

    model4.add(TimeDistributed(Dense(EMBEDDING_DIM)))
    model4.add(BatchNormalization())
    model4.add(Activation('relu'))
    model4.add(Dropout(dropout))
    model4.add(Lambda(lambda x: K.max(x, axis=1), output_shape=(EMBEDDING_DIM, )))




    modela = Sequential()
    modela.add(Merge([model1, model2], mode='concat'))
    modela.add(Dense(units*2, kernel_initializer=weights, bias_initializer=bias))
    modela.add(BatchNormalization())
    modela.add(Activation('relu'))
    modela.add(Dropout(dropout))

    modela.add(Dense(units, kernel_initializer=weights, bias_initializer=bias))
    modela.add(BatchNormalization())
    modela.add(Activation('relu'))
    modela.add(Dropout(dropout))

    modelb = Sequential()
    modelb.add(Merge([model3, model4], mode='concat'))
    modelb.add(Dense(units*2, kernel_initializer=weights, bias_initializer=bias))
    modelb.add(BatchNormalization())
    modelb.add(Activation('relu'))
    modelb.add(Dropout(dropout))

    modelb.add(Dense(units, kernel_initializer=weights, bias_initializer=bias))
    modelb.add(BatchNormalization())
    modelb.add(Activation('relu'))
    modelb.add(Dropout(dropout))



    model = Sequential()
    model.add(Merge([modela, modelb], mode='concat'))
    model.add(Dense(units*2, kernel_initializer=weights, bias_initializer=bias))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    model.add(Dropout(dropout))

    model.add(Dense(units, kernel_initializer=weights, bias_initializer=bias))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    model.add(Dropout(dropout))

    model.add(Dense(units, kernel_initializer=weights, bias_initializer=bias))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    model.add(Dropout(dropout))

    model.add(Dense(1, kernel_initializer=weights, bias_initializer=bias))
    model.add(BatchNormalization())
    model.add(Activation('sigmoid'))
    
    model.compile(
        loss='binary_crossentropy',
        optimizer='nadam',
        metrics=['accuracy']
    )

    return model

In [16]:
model_checkpoint_path = aux_data_folder + 'fold-checkpoint-' + feature_list_id + '.h5'

In [17]:
NUM_FOLDS = 5
NUM_EPOCHS = 35

In [18]:
kfold = StratifiedKFold(
    n_splits=NUM_FOLDS,
    shuffle=True,
    random_state=RANDOM_SEED
)

In [19]:
model = None

In [20]:
y_train_oofp = np.zeros_like(y_train, dtype='float64')

In [21]:
y_test_oofp = np.zeros((len(X_test_q1), NUM_FOLDS))

In [22]:
%%time

for fold_num, (ix_train, ix_val) in enumerate(kfold.split(X_train_q1, y_train)):
    X_fold_train_q1 = X_train_q1[ix_train]
    X_fold_train_q2 = X_train_q2[ix_train]

    X_fold_val_q1 = X_train_q1[ix_val]
    X_fold_val_q2 = X_train_q2[ix_val]

    y_fold_train = y_train[ix_train]
    y_fold_val = y_train[ix_val]
    
    print()
    print(f'Fitting fold {fold_num + 1} of {kfold.n_splits}')
    print()
    
    model = create_model()
    model.fit(
        [X_fold_train_q1, X_fold_train_q2, X_fold_train_q1, X_fold_train_q2], y_fold_train,
        validation_data=([X_fold_val_q1, X_fold_val_q2, X_fold_val_q1, X_fold_val_q2], y_fold_val),
        class_weight=keras_get_class_weights(y_fold_val),

        batch_size=128,
        epochs=NUM_EPOCHS,
        verbose=1,

        callbacks=[
            EarlyStopping(
                monitor='val_loss',
                min_delta=0.001,
                patience=3,
                verbose=1,
                mode='auto',
            ),
            ModelCheckpoint(
                model_checkpoint_path,
                monitor='val_loss',
                save_best_only=True,
                verbose=2,
            ),
        ],
    )
        
    # Create out-of-fold prediction.
    model.load_weights(model_checkpoint_path)
    
    y_pred_oofp = model.predict(
        [X_train_q1[ix_val], X_train_q2[ix_val], X_train_q1[ix_val], X_train_q2[ix_val]],
        batch_size=1024,
        verbose=1
    ).reshape(-1)
    
    y_test_oofp[:, fold_num] = model.predict(
        [X_test_q1, X_test_q2, X_test_q1, X_test_q2],
        batch_size=1024,
        verbose=1
    ).reshape(-1)
    
    # Remember them.
    y_train_oofp[ix_val] = y_pred_oofp
    
    K.clear_session()
    del X_fold_train_q1
    del X_fold_train_q2
    del X_fold_val_q1
    del X_fold_val_q2
    del model
    gc.collect()


Fitting fold 1 of 5





Train on 323431 samples, validate on 80859 samples
Epoch 1/35
Epoch 2/35
Epoch 3/35
Epoch 4/35
Epoch 5/35
Epoch 6/35
Epoch 7/35
Epoch 8/35
Epoch 9/35
Epoch 10/35
Epoch 11/35
Epoch 12/35
Epoch 13/35
Epoch 14/35
Epoch 15/35
Epoch 16/35
Epoch 17/35
Epoch 18/35
Epoch 19/35
Epoch 20/35
Epoch 21/35
Epoch 22/35
Epoch 23/35
Epoch 24/35
Epoch 00023: early stopping

Fitting fold 2 of 5

Train on 323431 samples, validate on 80859 samples
Epoch 1/35


Epoch 2/35
Epoch 3/35
Epoch 4/35
Epoch 5/35
Epoch 6/35
Epoch 7/35
Epoch 8/35
Epoch 9/35
Epoch 10/35
Epoch 11/35
Epoch 12/35
Epoch 13/35
Epoch 14/35
Epoch 00013: early stopping
Fitting fold 3 of 5

Train on 323432 samples, validate on 80858 samples
Epoch 1/35
Epoch 2/35
Epoch 3/35
Epoch 4/35
Epoch 5/35
Epoch 6/35
Epoch 7/35
Epoch 8/35
Epoch 9/35
Epoch 10/35
Epoch 11/35


Epoch 12/35
Epoch 13/35
Epoch 14/35
Epoch 15/35
Epoch 16/35
Epoch 00015: early stopping
Fitting fold 4 of 5

Train on 323433 samples, validate on 80857 samples
Epoch 1/35
Epoch 2/35
Epoch 3/35
Epoch 4/35
Epoch 5/35
Epoch 6/35
Epoch 7/35
Epoch 8/35
Epoch 9/35
Epoch 10/35
Epoch 11/35
Epoch 12/35
Epoch 13/35
Epoch 14/35
Epoch 15/35
Epoch 16/35
Epoch 00015: early stopping

Fitting fold 5 of 5

Train on 323433 samples, validate on 80857 samples
Epoch 1/35
Epoch 2/35


Epoch 3/35
Epoch 4/35
Epoch 5/35
Epoch 6/35
Epoch 7/35
Epoch 8/35
Epoch 9/35
Epoch 10/35
Epoch 11/35
Epoch 12/35
Epoch 00011: early stopping
Wall time: 1h 8min 21s


## Save feature names

In [23]:
feature_names = [
    'oofp_currie32_cnn',
]

In [24]:
save_lines(feature_names, features_data_folder + f'X_train_{feature_list_id}.names')

## Save Train features

In [25]:
y_train_oofp = y_train_oofp.reshape((-1, 1))

In [26]:
save(y_train_oofp, features_data_folder + f'X_train_{feature_list_id}.pickle')

## Save Test features

In [27]:
y_test_oofp_mean = np.mean(y_test_oofp, axis=1).reshape((-1, 1))

In [28]:
save(y_test_oofp_mean, features_data_folder + f'X_test_{feature_list_id}.pickle')