In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
from santander_helper import auc, DataGenerator
from keras.models import Model, Sequential
from keras.layers import Input, Dense, Concatenate, BatchNormalization, Activation, Conv1D, Flatten, Dropout, MaxPool1D, GlobalMaxPool1D
from keras.optimizers import Adam, SGD, RMSprop
from keras.regularizers import l1, l2
import pandas as pd
import numpy as np
from sklearn.metrics import roc_auc_score
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.layers.advanced_activations import LeakyReLU, PReLU, ELU
from sklearn.model_selection import KFold, StratifiedKFold
from keras_contrib.callbacks import CyclicLR
from keras_contrib.layers import PELU

In [None]:
df_train_data = pd.read_csv('train_with_counts.csv').drop(columns=['ID_code'])

In [None]:
# df_train = pd.concat([df_train_data, df_train_pseudo], axis=0, sort=False)
df_train = df_train_data
y = df_train['target'].values
df_train_X = df_train.drop(columns=['target'])

In [None]:
# for j in range(200):
#     df_train_X[f'var_{j}_NEG'] = df_train_X[f'var_{j}']**2

In [None]:
df_train_X.head()

In [None]:
reverse_vars = False
if reverse_vars: 
    reverse_list = [0, 1, 2, 3, 4, 5, 6, 7, 8, 11, 15, 16, 18, 19, 22, 24, 25, 26,
                    27, 29, 32, 35, 37, 40, 41, 47, 48, 49, 51, 52, 53, 55, 60, 61,
                    62, 65, 66, 67, 69, 70, 71, 74, 78, 79, 82, 84, 89, 90, 91, 94,
                    95, 96, 97, 99, 103, 105, 106, 110, 111, 112, 118, 119, 125, 128,
                    130, 133, 134, 135, 137, 138, 140, 144, 145, 147, 151, 155, 157,
                    159, 161, 162, 163, 164, 167, 168, 170, 171, 173, 175, 176, 179,
                    180, 181, 184, 185, 187, 189, 190, 191, 195, 196, 199,
                    ]

    for j in reverse_list:
        df_train_X[f'var_{j}'] *= -1

In [None]:
df_train_X_normalized = (df_train_X - df_train_X.mean(axis=0))/df_train_X.std(axis=0)

In [None]:
common_rows = 2
X_train_normalized = np.zeros((df_train_X_normalized.shape[0], common_rows*200, 1))
for i in range(200):
    X_train_normalized[:, common_rows*i] = df_train_X_normalized[[f'var_{i}']].values
    X_train_normalized[:, common_rows*i+1] = df_train_X_normalized[[f'var_{i}_FE']].values
    # X_train_normalized[:, common_rows*i+2] = df_train_X_normalized[[f'var_{i}_NEG']].values

In [None]:
def get_model(N_units = 600, kernel_size=common_rows, strides=common_rows):
    model = Sequential()
    model.add(Conv1D(N_units, kernel_size=kernel_size, strides=strides, padding='valid', 
                     # kernel_regularizer=l2(0.01),
                     # kernel_regularizer=l1(0.000),
                     activation='relu', input_shape=(X_train_normalized.shape[1], 1,)))
#     model.add(PELU())
#     model.add(LeakyReLU())
#     model.add(MaxPool1D(2))
    model.add(Flatten())
#     model.add(Dropout(0.25))
    model.add(Dense(1, activation='sigmoid'))
    return model

In [None]:
get_model().summary()

In [None]:
best_model_file_name = 'best_full_model_aux.hdf5'

In [None]:
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
patience = 18
epochs = 100
bs = 1024
N_units = 600
class_0_aug = 4
class_1_aug = 8

for fold, (trn_idx, val_idx) in enumerate(skf.split(df_train_X_normalized, y)):
    print('###############################################')
    print(f'##################Fold {fold}#######################')
    print('###############################################')
    model = get_model(N_units)
    model.compile(Adam(), loss='binary_crossentropy', metrics=[auc, 'accuracy'])
    es = EarlyStopping(monitor='val_auc', patience=patience, mode='max', verbose=1)
    mc = ModelCheckpoint(best_model_file_name, monitor='val_auc', mode='max', verbose=1, save_best_only=True)
 
    generator = DataGenerator(X_train_normalized[trn_idx], y[trn_idx], 
                              batch_size=bs, shuffle=True, 
                              class_1_aug=class_1_aug, 
                              class_0_aug=class_0_aug,
                              common_rows = common_rows
                             )
    tr_iter_in_epoch = generator.__len__()
#     gamma = 1 - 6e-05  * 4*312/tr_iter_in_epoch
#     clr = CyclicLR(base_lr=0.0001, max_lr=0.005, step_size=4*tr_iter_in_epoch, mode='exp_range', gamma=gamma)
    clr = CyclicLR(base_lr=0.0001, max_lr=0.005, step_size=4*tr_iter_in_epoch, mode='triangular2')
    X_val_data, y_val_data = DataGenerator.augment(X_train_normalized[val_idx], 
                                     y[val_idx], class_1_aug=class_1_aug, class_0_aug=class_0_aug, common_rows = common_rows)
    indexes_val = np.arange(len(y_val_data))
    np.random.shuffle(indexes_val)
    model.fit_generator(generator,
              epochs=epochs,
              verbose=2,
              callbacks = [es, 
                           mc, 
                           clr],
              # validation_data=(X_train_normalized[val_idx], y[val_idx].reshape(-1,1)*np.ones((len(val_idx), 200))))
              validation_data=(X_val_data[indexes_val], y_val_data[indexes_val])
              # validation_data=(X_train_normalized[val_idx], y[val_idx])
                )
    # print(f'Finish training with lr {lr}')
    model = get_model()
    # Load weights from ModelCheckpoint
    model.load_weights(best_model_file_name)
    # Save them to disk
    model.save_weights(f'models/CNN_generator_fold_{fold}_cl1_{class_1_aug}_cl0_{class_0_aug}_{N_units}_rev.hdf5')

###############################################
##################Fold 0#######################
###############################################
Epoch 1/100
 - 29s - loss: 0.3210 - auc: 0.8647 - acc: 0.8677 - val_loss: 0.2973 - val_auc: 0.8987 - val_acc: 0.8774

Epoch 00001: val_auc improved from -inf to 0.89873, saving model to best_full_model_aux.hdf5
Epoch 2/100


Exception in thread Thread-31:
Traceback (most recent call last):
  File "/home/usuario/anaconda3/envs/gpu/lib/python3.6/threading.py", line 916, in _bootstrap_inner
    self.run()
  File "/home/usuario/anaconda3/envs/gpu/lib/python3.6/threading.py", line 864, in run
    self._target(*self._args, **self._kwargs)
  File "/home/usuario/anaconda3/envs/gpu/lib/python3.6/site-packages/keras/utils/data_utils.py", line 580, in _run
    self.sequence.on_epoch_end()
  File "/home/usuario/repos/santander_solved/santander_helper.py", line 77, in on_epoch_end
    common_rows = self.common_rows
  File "/home/usuario/repos/santander_solved/santander_helper.py", line 43, in augment
    xn = np.vstack(xn)
  File "/home/usuario/anaconda3/envs/gpu/lib/python3.6/site-packages/numpy/core/shape_base.py", line 283, in vstack
    return _nx.concatenate([atleast_2d(_m) for _m in tup], 0)
MemoryError

