In [9]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt
import lightgbm as lgb
from sklearn.model_selection import KFold, StratifiedKFold, train_test_split
import warnings
import gc
import time
import sys
import matplotlib.pyplot as plt

from sklearn.metrics import mean_squared_error, roc_auc_score, roc_curve
from sklearn.preprocessing import StandardScaler
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.filterwarnings('ignore')

from sklearn.utils import shuffle



In [2]:
train= pd.read_csv("../input/train.csv")
test= pd.read_csv("../input/test.csv")


In [35]:
feats = ["var_{}".format(i) for i in range(200)]
targets2 = ["target{}".format(i) for i in range(0,2)]

split_tar_df = train.copy(deep=True)
split_tar_df['target1'] = train['target']
split_tar_df['target0'] = 1 - train['target']

zero_subset_df = split_tar_df[split_tar_df['target0'] > 0.9]
ones_subset_df = split_tar_df[split_tar_df['target1'] > 0.9]

frames = [zero_subset_df]
for i in range(0,4):
    frames.append(ones_subset_df)

combined_df = pd.concat(frames)
combined_df = shuffle(combined_df)

combined_df.info()

X = combined_df[feats]
X_test = test[feats]
y = combined_df[['target1','target0']]


N_FOLDS = 5.0

#kf = KFold(n_splits=int(N_FOLDS), random_state=2019, shuffle=True)

#cvlist = list(StratifiedKFold(5, random_state=12345786).split(X, y))
scaler = StandardScaler()

X_sc = scaler.fit_transform(X)
X_test_sc = scaler.fit_transform(X_test)

print(X_sc.shape)
print(y.shape)



<class 'pandas.core.frame.DataFrame'>
Int64Index: 260294 entries, 65460 to 58252
Columns: 204 entries, ID_code to target0
dtypes: float64(200), int64(3), object(1)
memory usage: 407.1+ MB
(260294, 200)
(260294, 2)


In [38]:
import keras
from keras.layers import (Flatten, Conv1D, Conv2D, Input, Dense, Dropout, BatchNormalization,
                          concatenate, GaussianNoise, Reshape, TimeDistributed, LeakyReLU, PReLU, Embedding)
from keras.models import Model, load_model, save_model
from keras.optimizers import SGD, Adam
from sklearn.base import BaseEstimator, ClassifierMixin
from pathlib import Path
from keras.callbacks import Callback

class ROC_AUC(Callback):
    def __init__(self, validation_data):
        self.X_val, self.y_val = validation_data
    
    def on_epoch_end(self, epoch, logs={}):
        print("ROC AUC for this fold is ", roc_auc_score(self.y_val, self.model.predict(X_val)))
        
class NNv1(BaseEstimator, ClassifierMixin):
    def __init__(self,
                 inp_shape=200,
                 gaussian_noise=0.01,
                 dense1_dim=64,
                 dense2_dim=64,
                 dense1_kwargs=None,
                 dense2_kwargs=None,
                 classifier_kwargs=None,
                 optimizer=SGD,
                 opt_kwargs=None,
                 ):
        self.inp_shape = inp_shape
        self.gaussian_noise = gaussian_noise
        self.dense1_dim = dense1_dim
        self.dense2_dim = dense2_dim
        self.dense1_kwargs = dense1_kwargs
        self.dense2_kwargs = dense2_kwargs
        self.classifier_kwargs = classifier_kwargs
        self.optimizer = optimizer
        self.opt_kwargs = opt_kwargs
        self._default_initiaization()

    def _default_initiaization(self):
        if self.dense1_kwargs is None:
            self.dense1_kwargs = {"kernel_initializer": "glorot_uniform"}
        if self.dense2_kwargs is None:
            self.dense2_kwargs = {"kernel_initializer": "he_uniform"}
        if self.classifier_kwargs is None:
            self.classifier_kwargs = {"kernel_initializer": "he_uniform"}
        if self.opt_kwargs is None:
            self.opt_kwargs = {}

    def _build_model(self):
        inp = Input(shape=(self.inp_shape,))
        x = GaussianNoise(self.gaussian_noise)(inp)
        x = Reshape((self.inp_shape, 1))(inp)
        d1 = Dense(self.dense1_dim, activation='tanh',)(x)
        #d1 = TimeDistributed(Dropout(0.2))(d1)
        d2 = Dense(self.dense1_dim, activation='relu',)(x)
        #d2 = PReLU()(d2)
        #d2 = TimeDistributed(Dropout(0.2))(d2)
        x = concatenate([d1, d2])
        x = Flatten()(x)
        out = Dense(2, activation='softmax', **self.classifier_kwargs)(x)

        model = Model(inputs=inp, outputs=out)
        opt = self.optimizer(**self.opt_kwargs)
        model.compile(loss='binary_crossentropy', optimizer=opt, metrics=['accuracy'])
        return model

    def fit(self, X, y, *args, **kwargs):
        self.model = self._build_model()
        print(self.model.summary())
        self.model.fit(X, y, *args, **kwargs)
        return self

    def predict(self, X, y=None, weight_path=None, **kwargs):
        if self.model:
            if weight_path is not None:
                self.model.load_weights(weight_path)
            y_hat = self.model.predict(X, **kwargs)
        else:
            raise ValueError("Model not fit yet")
        return y_hat

In [39]:
y_test_pred = np.zeros(len(test))
    
X_train, X_valid, y_train, y_valid = train_test_split(X_sc, y, test_size=0.2, random_state=42)

model = NNv1(opt_kwargs = {"lr": 0.02, "momentum": 0.9, "nesterov": True, "clipnorm": 1})

model.fit(X_train, y_train, epochs=10, batch_size = 1000, validation_data =(X_valid, y_valid))
pred = model.predict(X_valid)
print(pred.shape)
print( "  auc = ", roc_auc_score(y_valid, pred) )
y_test_pred = model.predict(X_test_sc)[:,0]
    

#save base submission
sub_df1 = pd.DataFrame({"ID_code":test["ID_code"].values})
sub_df1["target"] = y_test_pred 
sub_df1.to_csv("submission_full_equal_nn.csv", index=False)


__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_14 (InputLayer)           (None, 200)          0                                            
__________________________________________________________________________________________________
reshape_14 (Reshape)            (None, 200, 1)       0           input_14[0][0]                   
__________________________________________________________________________________________________
dense_34 (Dense)                (None, 200, 64)      128         reshape_14[0][0]                 
__________________________________________________________________________________________________
dense_35 (Dense)                (None, 200, 64)      128         reshape_14[0][0]                 
__________________________________________________________________________________________________
concatenat