## Imports

In [None]:
import datetime
import gc

In [None]:
from sklearn.model_selection import StratifiedKFold

In [None]:
cuda_use_gpus(0)

In [None]:
from keras import backend as K
from keras.models import Sequential
from keras.layers import *
from keras.callbacks import EarlyStopping, ModelCheckpoint

## Config

In [None]:
feature_list_id = 'oofp_bradleypallen_mlp'

In [None]:
RANDOM_SEED = 42

In [None]:
np.random.seed(RANDOM_SEED)

## Read Data

In [None]:
embedding_matrix = load(aux_data_folder + 'embedding_weights_fasttext_filtered_no_stopwords.pickle')

In [None]:
X_train_q1 = load(features_data_folder + 'X_train_nn_fasttext_q1_filtered_no_stopwords.pickle')
X_train_q2 = load(features_data_folder + 'X_train_nn_fasttext_q2_filtered_no_stopwords.pickle')

In [None]:
X_test_q1 = load(features_data_folder + 'X_test_nn_fasttext_q1_filtered_no_stopwords.pickle')
X_test_q2 = load(features_data_folder + 'X_test_nn_fasttext_q2_filtered_no_stopwords.pickle')

In [None]:
y_train = load(features_data_folder + 'y_train.pickle')

In [None]:
EMBEDDING_DIM = embedding_matrix.shape[-1]
VOCAB_LENGTH = embedding_matrix.shape[0]
MAX_SEQUENCE_LENGTH = X_train_q1.shape[-1]

In [None]:
print(EMBEDDING_DIM, VOCAB_LENGTH, MAX_SEQUENCE_LENGTH)

## Train Models & Compute Out-of-Fold Predictions

In [None]:
def create_model(params):
    model_q1 = Sequential()

    model_q1.add(Embedding(
        VOCAB_LENGTH,
        EMBEDDING_DIM,
        weights=[embedding_matrix],
        input_length=MAX_SEQUENCE_LENGTH,
        trainable=False,
    ))

    model_q1.add(TimeDistributed(Dense(
        EMBEDDING_DIM,
        activation='relu',
    )))

    model_q1.add(Lambda(lambda x: K.max(x, axis=1), output_shape=(EMBEDDING_DIM, )))

    model_q2 = Sequential()

    model_q2.add(Embedding(
        VOCAB_LENGTH,
        EMBEDDING_DIM,
        weights=[embedding_matrix],
        input_length=MAX_SEQUENCE_LENGTH,
        trainable=False,
    ))

    model_q2.add(TimeDistributed(Dense(
        EMBEDDING_DIM,
        activation='relu'
    )))

    model_q2.add(Lambda(lambda x: K.max(x, axis=1), output_shape=(EMBEDDING_DIM, )))

    model = Sequential()
    model.add(Merge([model_q1, model_q2], mode='concat'))
    model.add(BatchNormalization())

    model.add(Dense(params['num_dense_1']))
    model.add(BatchNormalization())
    model.add(Activation('relu'))

    model.add(Dense(params['num_dense_2']))
    model.add(BatchNormalization())
    model.add(Activation('relu'))

    model.add(Dense(params['num_dense_3']))
    model.add(BatchNormalization())
    model.add(Activation('relu'))

    model.add(Dense(params['num_dense_4']))
    model.add(BatchNormalization())
    model.add(Activation('relu'))

    model.add(BatchNormalization())
    model.add(Dense(1, activation='sigmoid'))

    model.compile(
        loss='binary_crossentropy', 
        optimizer='adam', 
        metrics=['accuracy']
    )

    return model

In [None]:
model_checkpoint_path = aux_data_folder + 'fold-checkpoint-' + feature_list_id + '.h5'

In [None]:
NUM_FOLDS = 5
NUM_EPOCHS = 200

In [None]:
kfold = StratifiedKFold(
    n_splits=NUM_FOLDS,
    shuffle=True,
    random_state=RANDOM_SEED
)

In [None]:
y_train_oofp = np.zeros_like(y_train, dtype='float64')

In [None]:
y_test_oofp = np.zeros((len(X_test_q1), NUM_FOLDS))

In [None]:
model_params = {
    'num_dense_1': 500,
    'num_dense_2': 225,
    'num_dense_3': 500,
    'num_dense_4': 100,
}

In [None]:
def predict(model, X_q1, X_q2):
    y1 = model.predict(
        [X_q1, X_q2],
        batch_size=1024,
        verbose=1
    ).reshape(-1)
    
    y2 = model.predict(
        [X_q2, X_q1],
        batch_size=1024,
        verbose=1
    ).reshape(-1)
    
    return (y1 + y2) / 2

In [None]:
%%time

for fold_num, (ix_train, ix_val) in enumerate(kfold.split(X_train_q1, y_train)):
    X_fold_train_q1 = np.vstack([X_train_q1[ix_train], X_train_q2[ix_train]])
    X_fold_train_q2 = np.vstack([X_train_q2[ix_train], X_train_q1[ix_train]])

    X_fold_val_q1 = np.vstack([X_train_q1[ix_val], X_train_q2[ix_val]])
    X_fold_val_q2 = np.vstack([X_train_q2[ix_val], X_train_q1[ix_val]])

    y_fold_train = np.concatenate([y_train[ix_train], y_train[ix_train]])
    y_fold_val = np.concatenate([y_train[ix_val], y_train[ix_val]])
    
    print()
    print(f'Fitting fold {fold_num + 1} of {kfold.n_splits}')
    print()
    
    model = create_model(model_params)
    model.fit(
        [X_fold_train_q1, X_fold_train_q2], y_fold_train,
        validation_data=([X_fold_val_q1, X_fold_val_q2], y_fold_val),
#         class_weight=keras_get_class_weights(y_fold_val),

        batch_size=64,
        epochs=NUM_EPOCHS,
        verbose=1,
        
        callbacks=[
            EarlyStopping(
                monitor='val_loss',
                min_delta=0.001,
                patience=3,
                verbose=1,
                mode='auto',
            ),
            ModelCheckpoint(
                model_checkpoint_path,
                monitor='val_loss',
                save_best_only=True,
                verbose=2,
            ),
        ],
    )
        
    # Create out-of-fold prediction.
    model.load_weights(model_checkpoint_path)
    
    y_train_oofp[ix_val] = predict(model, X_train_q1[ix_val], X_train_q2[ix_val])
    y_test_oofp[:, fold_num] = predict(model, X_test_q1, X_test_q2)
    
    # Clear GPU memory.
    K.clear_session()
    del X_fold_train_q1
    del X_fold_train_q2
    del X_fold_val_q1
    del X_fold_val_q2
    del model
    gc.collect()

## Save feature names

In [None]:
feature_names = [
    'oofp_bradleypallen_mlp',
]

In [None]:
save_lines(feature_names, features_data_folder + f'X_train_{feature_list_id}.names')

## Save Train features

In [None]:
y_train_oofp = y_train_oofp.reshape((-1, 1))

In [None]:
save(y_train_oofp, features_data_folder + f'X_train_{feature_list_id}.pickle')

## Save Test features

In [None]:
y_test_oofp_mean = np.mean(y_test_oofp, axis=1).reshape((-1, 1))

In [None]:
save(y_test_oofp_mean, features_data_folder + f'X_test_{feature_list_id}.pickle')