Change Log:
1. Added MinMaxScaler
2. Same scaling for X_test and X_train

In [1]:
from hyperopt import Trials, STATUS_OK, tpe
from hyperas import optim
from hyperas.distributions import choice, uniform

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [13]:
def data():
    '''
    Data providing function:

    Make sure to have every relevant import statement included here and return data as
    used in model function below. This function is separated from model() so that hyperopt
    won't reload data for each evaluation run.
    '''
    import pandas as pd
    from sklearn.preprocessing import MinMaxScaler
    import numpy as np
    import numpy
    
    df = pd.read_csv("../data/train.csv", sep=",")
    X = df[df.columns[2:]].astype(float).values
    Y = np.array(df['target']).astype(float)
    
    scaler = MinMaxScaler(feature_range=(0, 1))
    X = scaler.fit_transform(X)
    
    X_train, Y_train, X_test, Y_test = X[:-60000,:], Y[:-60000], X[-60000:,:], Y[-60000:]
    
    print(X_train.shape)
    print(Y_train.shape)
    print(X_test.shape)
    print(Y_test.shape)
    
    return X_train, Y_train, X_test, Y_test

In [21]:
def model(X_train, Y_train, X_test, Y_test):
    '''
    Model providing function:

    Create Keras model with double curly brackets dropped-in as needed.
    Return value has to be a valid python dictionary with two customary keys:
        - loss: Specify a numeric evaluation metric to be minimized
        - status: Just use STATUS_OK and see hyperopt documentation if not feasible
    The last one is optional, though recommended, namely:
        - model: specify the model just created so that we can later use it again.
    '''
    from keras.models import Sequential
    from keras.layers.core import Dense, Dropout, Activation
    from keras.callbacks import ReduceLROnPlateau, CSVLogger, ModelCheckpoint
    from keras.regularizers import l1_l2

    model = Sequential()
    model.add(Dense({{choice([256, 512, 1024])}}, 
                    input_shape=(X_train.shape[1],),
                    kernel_regularizer = l1_l2(l1={{uniform(0, 1)}}, 
                                               l2={{uniform(0, 1)}})))
    model.add(Activation('relu'))
    model.add(Dropout({{uniform(0, 1)}}))
    
    model.add(Dense({{choice([256, 512, 1024])}},
                    kernel_regularizer = l1_l2(l1={{uniform(0, 1)}}, 
                                               l2={{uniform(0, 1)}})))
    model.add(Activation({{choice(['relu', 'sigmoid'])}}))
    model.add(Dropout({{uniform(0, 1)}}))
    
    model.add(Dense({{choice([256, 512, 1024])}},
                    kernel_regularizer = l1_l2(l1={{uniform(0, 1)}}, 
                                               l2={{uniform(0, 1)}})))
    model.add(Activation({{choice(['relu', 'sigmoid'])}}))
    model.add(Dropout({{uniform(0, 1)}}))
    
    model.add(Dense(1))
    model.add(Activation('sigmoid'))
    
    model.compile(loss='binary_crossentropy', optimizer={{choice(['rmsprop', 'adam', 'sgd'])}})
    
    early_stopping = EarlyStopping(monitor='val_loss', patience=5)
    checkpointer = ModelCheckpoint(filepath='../results/trial2_bestmodel.hdf5',
                                   verbose=1,
                                   save_best_only=True)
    #MCP = ModelCheckpoint(filepath = "../results/BestModel1.h5", monitor='val_loss', verbose=0, save_best_only=True, 
    #                  save_weights_only=False, mode='auto', period=1)
    
    #RLROP = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=5, verbose=1, 
    #                      mode='auto', min_delta=0.00001, cooldown=1, min_lr=0)
    
    #CSVL = CSVLogger(filename = "../results/LogFile1.txt", separator=',', append=False)
    
    model.fit(X_train, Y_train,
              batch_size={{choice([64, 128])}},
              epochs=20,
              verbose=2,
              validation_data=(X_test, Y_test))
    acc = model.evaluate(X_test, Y_test, verbose=0)
    print('Test accuracy:', acc)
    return {'loss': -acc, 'status': STATUS_OK, 'model': model}

In [22]:
best_run, best_model = optim.minimize(model=model,
                                      data=data,
                                      algo=tpe.suggest,
                                      max_evals=100,
                                      trials=Trials(),
                                      notebook_name='Trial 2')

>>> Imports:
#coding=utf-8

try:
    from hyperopt import Trials, STATUS_OK, tpe
except:
    pass

try:
    from hyperas import optim
except:
    pass

try:
    from hyperas.distributions import choice, uniform
except:
    pass

try:
    import pandas as pd
except:
    pass

try:
    from sklearn.preprocessing import MinMaxScaler
except:
    pass

try:
    import numpy as np
except:
    pass

try:
    import numpy
except:
    pass

try:
    from keras.models import Sequential
except:
    pass

try:
    from keras.layers.core import Dense, Dropout, Activation
except:
    pass

try:
    from keras.callbacks import ReduceLROnPlateau, CSVLogger, ModelCheckpoint
except:
    pass

try:
    from keras.regularizers import l1_l2
except:
    pass

try:
    from keras.models import Sequential, Model, load_model
except:
    pass

try:
    from keras.layers import Input, Dense, Activation, Reshape, Dropout, PReLU, Concatenate, concatenate, multiply
except:
    pass

try:
    from keras.layers.norma

Epoch 10/20
 - 10s - loss: 153.2362 - val_loss: 153.2378
Epoch 11/20
 - 10s - loss: 153.2361 - val_loss: 153.2375
Epoch 12/20
 - 10s - loss: 153.2362 - val_loss: 153.2383
Epoch 13/20
 - 10s - loss: 153.2362 - val_loss: 153.2392
Epoch 14/20
 - 10s - loss: 153.2360 - val_loss: 153.2392
Epoch 15/20
 - 10s - loss: 153.2360 - val_loss: 153.2388
Epoch 16/20
 - 10s - loss: 153.2363 - val_loss: 153.2391
Epoch 17/20
 - 10s - loss: 153.2363 - val_loss: 153.2399
Epoch 18/20
 - 10s - loss: 153.2362 - val_loss: 153.2386
Epoch 19/20
 - 10s - loss: 153.2362 - val_loss: 153.2388
Epoch 20/20
 - 10s - loss: 153.2362 - val_loss: 153.2394
Test accuracy: 153.23941568196614
Train on 140000 samples, validate on 60000 samples
Epoch 1/20
 - 7s - loss: 467.6383 - val_loss: 60.7382
Epoch 2/20
 - 6s - loss: 60.8071 - val_loss: 61.9462
Epoch 3/20
 - 6s - loss: 61.7078 - val_loss: 62.0194
Epoch 4/20
 - 6s - loss: 61.7153 - val_loss: 61.5264
Epoch 5/20
 - 6s - loss: 61.6998 - val_loss: 61.5269
Epoch 6/20
 - 6s - los

Epoch 7/20
 - 10s - loss: 143.2911 - val_loss: 143.3065
Epoch 8/20
 - 10s - loss: 143.2909 - val_loss: 143.2934
Epoch 9/20
 - 10s - loss: 143.2908 - val_loss: 143.2938
Epoch 10/20
 - 10s - loss: 143.2907 - val_loss: 143.2934
Epoch 11/20
 - 10s - loss: 143.2906 - val_loss: 143.2994
Epoch 12/20
 - 10s - loss: 143.2905 - val_loss: 143.2991
Epoch 13/20
 - 10s - loss: 143.2906 - val_loss: 143.2929
Epoch 14/20
 - 10s - loss: 143.2906 - val_loss: 143.2928
Epoch 15/20
 - 10s - loss: 143.2903 - val_loss: 143.2934
Epoch 16/20
 - 10s - loss: 143.2902 - val_loss: 143.2989
Epoch 17/20
 - 10s - loss: 143.2903 - val_loss: 143.2940
Epoch 18/20
 - 10s - loss: 143.2901 - val_loss: 143.2926
Epoch 19/20
 - 10s - loss: 143.2901 - val_loss: 143.2926
Epoch 20/20
 - 10s - loss: 143.2902 - val_loss: 143.2960
Test accuracy: 143.29599704589845
Train on 140000 samples, validate on 60000 samples
Epoch 1/20
 - 13s - loss: 63.6632 - val_loss: 13.7788
Epoch 2/20
 - 11s - loss: 13.8191 - val_loss: 13.8246
Epoch 3/20
 

Epoch 5/20
 - 6s - loss: 44.2309 - val_loss: 44.0917
Epoch 6/20
 - 6s - loss: 44.3128 - val_loss: 44.2265
Epoch 7/20
 - 6s - loss: 44.6199 - val_loss: 45.0620
Epoch 8/20
 - 6s - loss: 45.2373 - val_loss: 45.3989
Epoch 9/20
 - 6s - loss: 45.4641 - val_loss: 45.6503
Epoch 10/20
 - 6s - loss: 45.4803 - val_loss: 45.1657
Epoch 11/20
 - 6s - loss: 45.4860 - val_loss: 45.6713
Epoch 12/20
 - 6s - loss: 45.5042 - val_loss: 45.6299
Epoch 13/20
 - 6s - loss: 45.5121 - val_loss: 45.4702
Epoch 14/20
 - 6s - loss: 45.5115 - val_loss: 45.1334
Epoch 15/20
 - 6s - loss: 45.5103 - val_loss: 45.8134
Epoch 16/20
 - 6s - loss: 45.5078 - val_loss: 45.5841
Epoch 17/20
 - 6s - loss: 45.5055 - val_loss: 45.4302
Epoch 18/20
 - 6s - loss: 45.5031 - val_loss: 45.4547
Epoch 19/20
 - 6s - loss: 45.5000 - val_loss: 45.5897
Epoch 20/20
 - 6s - loss: 45.4959 - val_loss: 45.3578
Test accuracy: 45.357769767252606
Train on 140000 samples, validate on 60000 samples
Epoch 1/20
 - 10s - loss: 339.3680 - val_loss: 40.9927
E

Train on 140000 samples, validate on 60000 samples
Epoch 1/20
 - 16s - loss: 6149.6957 - val_loss: 6106.3682
Epoch 2/20
 - 12s - loss: 6106.0539 - val_loss: 6105.8584
Epoch 3/20
 - 12s - loss: 6106.0394 - val_loss: 6105.9288
Epoch 4/20
 - 12s - loss: 6106.0358 - val_loss: 6106.0740
Epoch 5/20
 - 12s - loss: 6106.0349 - val_loss: 6106.2055
Epoch 6/20
 - 12s - loss: 6106.0344 - val_loss: 6105.7672
Epoch 7/20
 - 12s - loss: 6106.0342 - val_loss: 6105.8789
Epoch 8/20
 - 12s - loss: 6106.0337 - val_loss: 6105.9618
Epoch 9/20
 - 12s - loss: 6106.0333 - val_loss: 6106.0692
Epoch 10/20
 - 12s - loss: 6106.0340 - val_loss: 6105.9874
Epoch 11/20
 - 12s - loss: 6106.0330 - val_loss: 6106.1473
Epoch 12/20
 - 12s - loss: 6106.0332 - val_loss: 6105.8914
Epoch 13/20
 - 12s - loss: 6106.0333 - val_loss: 6106.0026
Epoch 14/20
 - 12s - loss: 6106.0328 - val_loss: 6106.0417
Epoch 15/20
 - 12s - loss: 6106.0326 - val_loss: 6105.9548
Epoch 16/20
 - 12s - loss: 6106.0322 - val_loss: 6105.9424
Epoch 17/20
 -

Epoch 12/20
 - 12s - loss: 3043.1413 - val_loss: 3043.2327
Epoch 13/20
 - 12s - loss: 3043.1409 - val_loss: 3043.3760
Epoch 14/20
 - 12s - loss: 3043.1406 - val_loss: 3043.0926
Epoch 15/20
 - 12s - loss: 3043.1401 - val_loss: 3043.3172
Epoch 16/20
 - 13s - loss: 3043.1395 - val_loss: 3043.0856
Epoch 17/20
 - 12s - loss: 3043.1398 - val_loss: 3043.0251
Epoch 18/20
 - 12s - loss: 3043.1391 - val_loss: 3042.7923
Epoch 19/20
 - 12s - loss: 3043.1389 - val_loss: 3043.1425
Epoch 20/20
 - 12s - loss: 3043.1386 - val_loss: 3043.1928
Test accuracy: 3043.1928397135416
Train on 140000 samples, validate on 60000 samples
Epoch 1/20
 - 17s - loss: 4023.8509 - val_loss: 3980.2938
Epoch 2/20
 - 12s - loss: 3980.2888 - val_loss: 3980.2680
Epoch 3/20
 - 12s - loss: 3980.2873 - val_loss: 3980.1545
Epoch 4/20
 - 12s - loss: 3980.2866 - val_loss: 3980.3125
Epoch 5/20
 - 12s - loss: 3980.2863 - val_loss: 3980.2797
Epoch 6/20
 - 12s - loss: 3980.2853 - val_loss: 3980.1908
Epoch 7/20
 - 12s - loss: 3980.2849 

Epoch 2/20
 - 5s - loss: 420.4808 - val_loss: 420.4460
Epoch 3/20
 - 5s - loss: 420.4766 - val_loss: 420.5015
Epoch 4/20
 - 5s - loss: 420.4749 - val_loss: 420.4888
Epoch 5/20
 - 5s - loss: 420.4742 - val_loss: 420.4789
Epoch 6/20
 - 5s - loss: 420.4731 - val_loss: 420.4898
Epoch 7/20
 - 5s - loss: 420.4732 - val_loss: 420.5178
Epoch 8/20
 - 5s - loss: 420.4729 - val_loss: 420.5283
Epoch 9/20
 - 5s - loss: 420.4730 - val_loss: 420.5365
Epoch 10/20
 - 5s - loss: 420.4727 - val_loss: 420.4310
Epoch 11/20
 - 5s - loss: 420.4727 - val_loss: 420.4035
Epoch 12/20
 - 5s - loss: 420.4722 - val_loss: 420.5027
Epoch 13/20
 - 5s - loss: 420.4727 - val_loss: 420.4222
Epoch 14/20
 - 5s - loss: 420.4721 - val_loss: 420.4709
Epoch 15/20
 - 5s - loss: 420.4725 - val_loss: 420.5052
Epoch 16/20
 - 5s - loss: 420.4726 - val_loss: 420.4213
Epoch 17/20
 - 5s - loss: 420.4722 - val_loss: 420.4552
Epoch 18/20
 - 5s - loss: 420.4719 - val_loss: 420.5102
Epoch 19/20
 - 5s - loss: 420.4725 - val_loss: 420.4800


Epoch 17/20
 - 13s - loss: 471.9153 - val_loss: 471.9562
Epoch 18/20
 - 13s - loss: 471.9153 - val_loss: 471.9441
Epoch 19/20
 - 13s - loss: 471.9147 - val_loss: 471.9392
Epoch 20/20
 - 13s - loss: 471.9152 - val_loss: 471.9306
Test accuracy: 471.93059907226564
Train on 140000 samples, validate on 60000 samples
Epoch 1/20
 - 17s - loss: 913.6342 - val_loss: 884.6340
Epoch 2/20
 - 11s - loss: 884.6422 - val_loss: 884.6728
Epoch 3/20
 - 11s - loss: 884.6422 - val_loss: 884.6226
Epoch 4/20
 - 11s - loss: 884.6420 - val_loss: 884.5938
Epoch 5/20
 - 11s - loss: 884.6418 - val_loss: 884.7244
Epoch 6/20
 - 11s - loss: 884.6412 - val_loss: 884.5749
Epoch 7/20
 - 11s - loss: 884.6414 - val_loss: 884.6631
Epoch 8/20
 - 11s - loss: 884.6411 - val_loss: 884.6273
Epoch 9/20
 - 11s - loss: 884.6412 - val_loss: 884.6917
Epoch 10/20
 - 11s - loss: 884.6409 - val_loss: 884.6269
Epoch 11/20
 - 11s - loss: 884.6406 - val_loss: 884.6182
Epoch 12/20
 - 11s - loss: 884.6405 - val_loss: 884.5909
Epoch 13/20


Epoch 12/20
 - 6s - loss: 134.3386 - val_loss: 134.3503
Epoch 13/20
 - 6s - loss: 134.3385 - val_loss: 134.3372
Epoch 14/20
 - 6s - loss: 134.3382 - val_loss: 134.3376
Epoch 15/20
 - 6s - loss: 134.3385 - val_loss: 134.3371
Epoch 16/20
 - 6s - loss: 134.3378 - val_loss: 134.3379
Epoch 17/20
 - 6s - loss: 134.3379 - val_loss: 134.3371
Epoch 18/20
 - 6s - loss: 134.3379 - val_loss: 134.3378
Epoch 19/20
 - 6s - loss: 134.3374 - val_loss: 134.3411
Epoch 20/20
 - 6s - loss: 134.3382 - val_loss: 134.3394
Test accuracy: 134.33937866210937
Train on 140000 samples, validate on 60000 samples
Epoch 1/20
 - 20s - loss: 194.9768 - val_loss: 44.7140
Epoch 2/20
 - 12s - loss: 44.4709 - val_loss: 44.4004
Epoch 3/20
 - 13s - loss: 44.2933 - val_loss: 44.3066
Epoch 4/20
 - 13s - loss: 44.2204 - val_loss: 44.1571
Epoch 5/20
 - 12s - loss: 44.1522 - val_loss: 44.0289
Epoch 6/20
 - 12s - loss: 44.1345 - val_loss: 44.2133
Epoch 7/20
 - 13s - loss: 44.1000 - val_loss: 44.0344
Epoch 8/20
 - 13s - loss: 44.040

Epoch 6/20
 - 11s - loss: 140.5429 - val_loss: 140.4173
Epoch 7/20
 - 11s - loss: 140.4197 - val_loss: 140.2635
Epoch 8/20
 - 11s - loss: 140.2982 - val_loss: 140.3648
Epoch 9/20
 - 10s - loss: 140.2287 - val_loss: 141.1914
Epoch 10/20
 - 10s - loss: 140.1841 - val_loss: 139.3175
Epoch 11/20
 - 11s - loss: 140.1798 - val_loss: 140.1278
Epoch 12/20
 - 11s - loss: 140.2201 - val_loss: 140.6326
Epoch 13/20
 - 11s - loss: 140.3201 - val_loss: 140.3508
Epoch 14/20
 - 11s - loss: 140.4518 - val_loss: 140.3439
Epoch 15/20
 - 11s - loss: 140.5812 - val_loss: 141.3944
Epoch 16/20
 - 11s - loss: 140.7204 - val_loss: 140.1574
Epoch 17/20
 - 11s - loss: 140.8590 - val_loss: 140.7331
Epoch 18/20
 - 11s - loss: 140.9787 - val_loss: 141.8246
Epoch 19/20
 - 11s - loss: 141.0934 - val_loss: 141.4842
Epoch 20/20
 - 11s - loss: 141.1952 - val_loss: 140.8690
Test accuracy: 140.8690276936849
Train on 140000 samples, validate on 60000 samples
Epoch 1/20
 - 22s - loss: 1686.6846 - val_loss: 1655.9246
Epoch 2

 - 14s - loss: 3357.2179 - val_loss: 3357.2775
Test accuracy: 3357.2774674479165
Train on 140000 samples, validate on 60000 samples
Epoch 1/20
 - 24s - loss: 446.9225 - val_loss: 288.7557
Epoch 2/20
 - 14s - loss: 288.7720 - val_loss: 288.7722
Epoch 3/20
 - 14s - loss: 288.7718 - val_loss: 288.7726
Epoch 4/20
 - 13s - loss: 288.7716 - val_loss: 288.7751
Epoch 5/20
 - 14s - loss: 288.7714 - val_loss: 288.7672
Epoch 6/20
 - 13s - loss: 288.7710 - val_loss: 288.7680
Epoch 7/20
 - 13s - loss: 288.7701 - val_loss: 288.7717
Epoch 8/20
 - 13s - loss: 288.7702 - val_loss: 288.7663
Epoch 9/20
 - 13s - loss: 288.7699 - val_loss: 288.7684
Epoch 10/20
 - 13s - loss: 288.7698 - val_loss: 288.7646
Epoch 11/20
 - 13s - loss: 288.7697 - val_loss: 288.7710
Epoch 12/20
 - 13s - loss: 288.7693 - val_loss: 288.7689
Epoch 13/20
 - 13s - loss: 288.7693 - val_loss: 288.7754
Epoch 14/20
 - 13s - loss: 288.7690 - val_loss: 288.7677
Epoch 15/20
 - 13s - loss: 288.7693 - val_loss: 288.7714
Epoch 16/20
 - 13s - l

Epoch 13/20
 - 13s - loss: 5619.3815 - val_loss: 5619.8683
Epoch 14/20
 - 13s - loss: 5619.3812 - val_loss: 5619.3584
Epoch 15/20
 - 13s - loss: 5619.3809 - val_loss: 5619.4116
Epoch 16/20
 - 13s - loss: 5619.3807 - val_loss: 5619.6034
Epoch 17/20
 - 13s - loss: 5619.3804 - val_loss: 5619.6326
Epoch 18/20
 - 13s - loss: 5619.3797 - val_loss: 5619.3235
Epoch 19/20
 - 14s - loss: 5619.3797 - val_loss: 5619.0162
Epoch 20/20
 - 14s - loss: 5619.3790 - val_loss: 5619.3289
Test accuracy: 5619.328918489583
Train on 140000 samples, validate on 60000 samples
Epoch 1/20
 - 25s - loss: 6272.5727 - val_loss: 6228.5086
Epoch 2/20
 - 14s - loss: 6228.7251 - val_loss: 6228.5231
Epoch 3/20
 - 14s - loss: 6228.7203 - val_loss: 6228.9628
Epoch 4/20
 - 14s - loss: 6228.7168 - val_loss: 6228.9038
Epoch 5/20
 - 14s - loss: 6228.7142 - val_loss: 6229.0622
Epoch 6/20
 - 14s - loss: 6228.7103 - val_loss: 6228.4396
Epoch 7/20
 - 14s - loss: 6228.7092 - val_loss: 6228.3376
Epoch 8/20
 - 14s - loss: 6228.7067 - 

KeyboardInterrupt: 

In [None]:
X_train, Y_train, X_test, Y_test = data()
print("Evalutation of best performing model:")
print(best_model.evaluate(X_test, Y_test))

In [1]:
from keras.models import Sequential, Model, load_model
from keras.layers import Input, Dense, Activation, Reshape, Dropout, PReLU, Concatenate, concatenate, multiply
from keras.layers.normalization import BatchNormalization
from keras.layers.embeddings import Embedding
from keras.optimizers import Adam, Nadam
from keras.callbacks import ReduceLROnPlateau, EarlyStopping, CSVLogger, ModelCheckpoint, LearningRateScheduler
from keras.constraints import nonneg

from sklearn.preprocessing import MinMaxScaler, StandardScaler, Normalizer, OneHotEncoder
from sklearn.metrics import mean_squared_error, roc_curve, confusion_matrix, accuracy_score

import pickle, sys, h5py, numpy
import pandas as pd
from math import sqrt
import numpy as np
numpy.random.seed(321)

Using TensorFlow backend.


In [72]:
# Importing Training Data
df = pd.read_csv("../data/train.csv", sep=",")

In [73]:
# Importing Testing Data
df_test = pd.read_csv("../data/test.csv", sep=",")

In [83]:
model = load_model('../results/BestModel1.h5')

In [75]:
# Sanity Checks
print("df-",df.shape)
print("df-",df.columns)

print(df.columns[0:2])
print(df.columns[2:])

df- (200000, 202)
df- Index(['ID_code', 'target', 'var_0', 'var_1', 'var_2', 'var_3', 'var_4',
       'var_5', 'var_6', 'var_7',
       ...
       'var_190', 'var_191', 'var_192', 'var_193', 'var_194', 'var_195',
       'var_196', 'var_197', 'var_198', 'var_199'],
      dtype='object', length=202)
Index(['ID_code', 'target'], dtype='object')
Index(['var_0', 'var_1', 'var_2', 'var_3', 'var_4', 'var_5', 'var_6', 'var_7',
       'var_8', 'var_9',
       ...
       'var_190', 'var_191', 'var_192', 'var_193', 'var_194', 'var_195',
       'var_196', 'var_197', 'var_198', 'var_199'],
      dtype='object', length=200)


In [76]:
X = df[df.columns[2:]].astype(float).values
y_clf = np.array(df['target']).astype(float)

X_test = df_test[df_test.columns[1:]].astype(float).values

In [77]:
X_backup = X

In [78]:
scaler = MinMaxScaler(feature_range=(0, 1))
X = scaler.fit_transform(X)

scaler_x_test = MinMaxScaler(feature_range=(0, 1))
X_test = scaler.fit_transform(X_test)

In [79]:
y_clf

array([0., 0., 0., ..., 0., 0., 0.])

In [80]:
print(X.shape)
print(y_clf.shape)

print(X_test.shape)

(200000, 200)
(200000,)
(200000, 200)


In [81]:
dropout_value = 0.2

In [82]:
input_model = Input(shape = (X.shape[1],))

output_model = Dense(512, activation='relu')(input_model)
output_model = Dense(256, activation='relu')(output_model)
output_model = Dense(128, activation='relu')(output_model)
output_model = Dense(1, activation='sigmoid')(output_model)

model = Model(inputs=input_model, 
              outputs=output_model)

lr1 = Adam(lr=0.001)

model.compile(loss='binary_crossentropy',
              optimizer=lr1,
              metrics = ['binary_accuracy'])

MCP = ModelCheckpoint(filepath = "../results/BestModel1.h5", monitor='val_loss', verbose=0, save_best_only=True, 
                      save_weights_only=False, mode='auto', period=1)

RLROP = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=5, verbose=1, 
                          mode='auto', min_delta=0.00001, cooldown=1, min_lr=0)

CSVL = CSVLogger(filename = "../results/LogFile1.txt", separator=',', append=False)

In [27]:
epochs = 100 # 20

model.fit(x = X, 
          y = y_clf,
          validation_split = 0.3,
          epochs=epochs, 
          batch_size= 1024,
          callbacks = [MCP, RLROP, CSVL],
          verbose = 1,
          shuffle = True)

Train on 140000 samples, validate on 60000 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100

Epoch 00008: ReduceLROnPlateau reducing learning rate to 0.00010000000474974513.
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100

Epoch 00040: ReduceLROnPlateau reducing learning rate to 1.0000000474974514e-05.
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100

Epoch 00047: ReduceLROnPlateau reducing learning rate to 1.0000000656873453e-06.
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100

Epoch 00052: ReduceLROnPlateau redu

Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100

Epoch 00097: ReduceLROnPlateau reducing learning rate to 1.0000001095066122e-16.
Epoch 98/100
Epoch 99/100
Epoch 100/100


<keras.callbacks.History at 0x16b014912e8>

In [40]:
epochs = 100 # 20

model.fit(x = X, 
          y = y_clf,
          validation_split = 0.3,
          epochs=epochs, 
          batch_size= 1024,
          callbacks = [MCP, RLROP, CSVL],
          verbose = 1,
          shuffle = True)

Train on 140000 samples, validate on 60000 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100

Epoch 00006: ReduceLROnPlateau reducing learning rate to 1.0000000656873453e-06.
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100

Epoch 00011: ReduceLROnPlateau reducing learning rate to 1.0000001111620805e-07.
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100

Epoch 00016: ReduceLROnPlateau reducing learning rate to 1.000000082740371e-08.
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100

Epoch 00021: ReduceLROnPlateau reducing learning rate to 1.000000082740371e-09.
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100

Epoch 00026: ReduceLROnPlateau reducing learning rate to 1.000000082740371e-10.
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100

Epoch 00031: ReduceLROnPlateau reducing learning rate to 1.000000082740371e-11.
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100

E

Epoch 90/100
Epoch 91/100

Epoch 00091: ReduceLROnPlateau reducing learning rate to 1.0000000944832675e-23.
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100

Epoch 00096: ReduceLROnPlateau reducing learning rate to 1.0000000787060494e-24.
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


<keras.callbacks.History at 0x16b025c2da0>

In [84]:
# On Training Data
model.evaluate(x = X, 
               y = y_clf,
              batch_size = 1024)



[0.22326479040145875, 0.91733]

In [85]:
best_model = load_model("../results/BestModel1.h5")

In [86]:
# On Training Data
best_model.evaluate(x = X, 
               y = y_clf,
              batch_size = 1024)



[0.22326479040145875, 0.91733]

In [87]:
old_model = model
model = best_model

In [90]:
y_preds = model.predict(X, batch_size = 1024)

y_test_preds = model.predict(X_test, batch_size = 1024)

In [91]:
y_preds.shape

(200000, 1)

In [92]:
(y_clf,y_preds.reshape(y_preds.shape[0],))

(array([0., 0., 0., ..., 0., 0., 0.]),
 array([0.02684054, 0.7223495 , 0.04017243, ..., 0.09523162, 0.02365717,
        0.00561482], dtype=float32))

In [93]:
from sklearn.metrics import roc_auc_score

roc_auc_score(y_clf,y_preds.reshape(y_preds.shape[0],))

0.8720867789797286

In [94]:
from keras.models import load_model

old_model.save('../results/Trial1.h5')

In [95]:
# How to threshold for an imbalanced problem
def Find_Optimal_Cutoff(target, predicted):
    """ Find the optimal probability cutoff point for a classification model related to event rate
    Parameters
    ----------
    target : Matrix with dependent or target data, where rows are observations

    predicted : Matrix with predicted data, where rows are observations

    Returns
    -------     
    list type, with optimal cutoff value

    """
    fpr, tpr, threshold = roc_curve(target, predicted)
    i = np.arange(len(tpr)) 
    roc = pd.DataFrame({'tf' : pd.Series(tpr-(1-fpr), index=i), 'threshold' : pd.Series(threshold, index=i)})
    roc_t = roc.iloc[(roc.tf-0).abs().argsort()[:1]]

    return list(roc_t['threshold']) 

In [97]:
threshold = Find_Optimal_Cutoff(y_clf, y_preds)[0]
threshold

0.09574279189109802

In [98]:
y_p = np.where(y_preds>threshold,1,0).reshape(-1)
y_test_p = np.where(y_test_preds>threshold,1,0).reshape(-1)

In [99]:
confusion_matrix(y_clf, y_p)

array([[142280,  37622],
       [  4204,  15894]], dtype=int64)

In [100]:
np.array(y_clf == y_p).mean()

0.79087

In [101]:
accuracy_score(y_clf,y_p, normalize = False)

158174

In [104]:
Train_results = pd.DataFrame({'ID_code' : df['ID_code'].tolist(),'actual' : y_clf,'pred' : y_preds[:,0]})

Train_results.to_csv("../results/TrainResults_Trial1.csv", index = False)

Test_results = pd.DataFrame({'ID_code' : df_test['ID_code'].tolist(),'target' : y_test_p})

Test_results.to_csv("../results/TestResults_Trial1.csv", index = False)

In [105]:
Train_results.columns

Index(['ID_code', 'actual', 'pred'], dtype='object')

In [106]:
Test_results.columns

Index(['ID_code', 'target'], dtype='object')

In [108]:
Train_results.head()

Unnamed: 0,ID_code,actual,pred
0,train_0,0.0,0.026841
1,train_1,0.0,0.72235
2,train_2,0.0,0.040172
3,train_3,0.0,0.244595
4,train_4,0.0,0.08465


In [107]:
Test_results.head()

Unnamed: 0,ID_code,target
0,test_0,1
1,test_1,1
2,test_2,0
3,test_3,1
4,test_4,0
