In [1]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt
import lightgbm as lgb
from sklearn.model_selection import KFold, StratifiedKFold
import warnings
import gc
import time
import sys
import matplotlib.pyplot as plt

from sklearn.metrics import mean_squared_error, roc_auc_score, roc_curve
from sklearn.preprocessing import StandardScaler
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.filterwarnings('ignore')



In [2]:
train= pd.read_csv("../input/train.csv")
test= pd.read_csv("../input/test.csv")


In [3]:
feats = ["var_{}".format(i) for i in range(200)]
X = train[feats]
X_test = test[feats]
y = train["target"]

cvlist = list(StratifiedKFold(5, random_state=12345786).split(X, y))
scaler = StandardScaler()

X_sc = scaler.fit_transform(X)
X_test_sc = scaler.fit_transform(X_test)



In [6]:
import keras
from keras.layers import (Flatten, Conv1D, Conv2D, Input, Dense, Dropout, BatchNormalization,
                          concatenate, GaussianNoise, Reshape, TimeDistributed, LeakyReLU, PReLU, Embedding)
from keras.models import Model, load_model, save_model
from keras.optimizers import SGD, Adam
from sklearn.base import BaseEstimator, ClassifierMixin
from pathlib import Path
from keras.callbacks import Callback

class ROC_AUC(Callback):
    def __init__(self, validation_data):
        self.X_val, self.y_val = validation_data
    
    def on_epoch_end(self, epoch, logs={}):
        print("ROC AUC for this fold is ", roc_auc_score(self.y_val, self.model.predict(X_val)))
        
class NNv1(BaseEstimator, ClassifierMixin):
    def __init__(self,
                 inp_shape=200,
                 gaussian_noise=0.01,
                 dense1_dim=32,
                 dense2_dim=32,
                 dense1_kwargs=None,
                 dense2_kwargs=None,
                 classifier_kwargs=None,
                 optimizer=SGD,
                 opt_kwargs=None,
                 ):
        self.inp_shape = inp_shape
        self.gaussian_noise = gaussian_noise
        self.dense1_dim = dense1_dim
        self.dense2_dim = dense2_dim
        self.dense1_kwargs = dense1_kwargs
        self.dense2_kwargs = dense2_kwargs
        self.classifier_kwargs = classifier_kwargs
        self.optimizer = optimizer
        self.opt_kwargs = opt_kwargs
        self._default_initiaization()

    def _default_initiaization(self):
        if self.dense1_kwargs is None:
            self.dense1_kwargs = {"kernel_initializer": "glorot_uniform"}
        if self.dense2_kwargs is None:
            self.dense2_kwargs = {"kernel_initializer": "he_uniform"}
        if self.classifier_kwargs is None:
            self.classifier_kwargs = {"kernel_initializer": "he_uniform"}
        if self.opt_kwargs is None:
            self.opt_kwargs = {}

    def _build_model(self):
        inp = Input(shape=(self.inp_shape,))
        # x = GaussianNoise(self.gaussian_noise)(inp)
        x = Reshape((self.inp_shape, 1))(inp)
        d1 = Dense(self.dense1_dim, activation='tanh',)(x)
        #d1 = TimeDistributed(Dropout(0.2))(d1)
        d2 = Dense(self.dense1_dim, activation='relu',)(x)
        #d2 = PReLU()(d2)
        #d2 = TimeDistributed(Dropout(0.2))(d2)
        x = concatenate([d1, d2])
        x = Flatten()(x)
        out = Dense(1, activation='sigmoid', **self.classifier_kwargs)(x)

        model = Model(inputs=inp, outputs=out)
        opt = self.optimizer(**self.opt_kwargs)
        model.compile(loss='binary_crossentropy', optimizer=opt)
        return model

    def fit(self, X, y, *args, **kwargs):
        self.model = self._build_model()
        print(self.model.summary())
        self.model.fit(X, y, *args, **kwargs)
        return self

    def predict(self, X, y=None, weight_path=None, **kwargs):
        if self.model:
            if weight_path is not None:
                self.model.load_weights(weight_path)
            y_hat = self.model.predict_proba(X, **kwargs)
        else:
            raise ValueError("Model not fit yet")
        return y_hat

In [7]:
model = NNv1(opt_kwargs = {"lr": 0.01, "momentum": 0.9, "nesterov": True, "clipnorm": 1})
y_preds_nn = np.zeros((len(y)))

for tr_idx, val_idx in cvlist:
    print("Fold {}".format(tr_idx))
    X_dev, y_dev = X_sc[tr_idx], y.iloc[tr_idx]
    X_val, y_val = X_sc[val_idx], y.iloc[val_idx]
    roc_auc = ROC_AUC((X_val, y_val))    
    model.fit(X_dev, y_dev, validation_data=(X_val, y_val), epochs=20, batch_size=256, verbose=0, callbacks=[roc_auc])
    val_preds = model.predict(X_val, batch_size=5000)
    y_preds_nn[val_idx] = val_preds.flatten()
    
sub = pd.DataFrame({"ID_code": test.ID_code.values})
output_df = pd.DataFrame({"ID_code": train.ID_code.values})
y_test_preds = model.predict(X_test_sc, batch_size = 5000)
sub['target'] = y_test_preds.flatten()
sub.to_csv('submission_full_nn.csv')

Fold [ 39913  39914  39915 ... 199997 199998 199999]
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_6 (InputLayer)            (None, 200)          0                                            
__________________________________________________________________________________________________
reshape_6 (Reshape)             (None, 200, 1)       0           input_6[0][0]                    
__________________________________________________________________________________________________
dense_16 (Dense)                (None, 200, 32)      64          reshape_6[0][0]                  
__________________________________________________________________________________________________
dense_17 (Dense)                (None, 200, 32)      64          reshape_6[0][0]                  
________________________________________________________

ROC AUC for this fold is  0.8949212802026554
ROC AUC for this fold is  0.8954687858650052
ROC AUC for this fold is  0.8956532961927438
ROC AUC for this fold is  0.8965844070365239
ROC AUC for this fold is  0.8965960808796485
ROC AUC for this fold is  0.8967823196413707
ROC AUC for this fold is  0.8971606219873397
ROC AUC for this fold is  0.8972427951957832
ROC AUC for this fold is  0.8975993469285036
ROC AUC for this fold is  0.8977316550930726
ROC AUC for this fold is  0.8973604082146246
Fold [     0      1      2 ... 199997 199998 199999]
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_9 (InputLayer)            (None, 200)          0                                            
__________________________________________________________________________________________________
reshape_9 (Reshape)             (None, 200, 1)       0  