In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import cross_val_score, KFold, train_test_split, GridSearchCV, StratifiedKFold
from sklearn.feature_selection import SelectFromModel
from sklearn.metrics import r2_score, mean_squared_error

In [2]:
# Keras is a deep learning library that wraps the efficient numerical libraries Theano and TensorFlow.
# It provides a clean and simple API that allows you to define and evaluate deep learning models in just a few lines of code.from keras.models import Sequential, load_model
from keras.models import Sequential, load_model
from keras.layers import Dense, Dropout, BatchNormalization, Activation
from keras.wrappers.scikit_learn import KerasRegressor
from keras.callbacks import EarlyStopping, ModelCheckpoint
# define custom R2 metrics for Keras backend
from keras import backend as K
# to tune the NN
from keras.constraints import maxnorm
from keras.optimizers import SGD, Adam
from keras.layers.advanced_activations import PReLU
from keras.regularizers import l2
import h5py

Using TensorFlow backend.


In [3]:
# define path to save model
import os
model_path = "../../data/Mercedes_Benz_Greener_Manufacturing/model/model_nn.h5"

## 6. Model

In [4]:
# data
dt_model = pd.read_csv("../../data/Mercedes_Benz_Greener_Manufacturing/data/dt_all_preprocess.csv")

In [5]:
# # remove ohe
# dt_model = dt_model.drop(dt_model.filter(regex = "Encode_ohe").columns, axis = 1)

In [6]:
# r_2 for nn
def r2_keras(y_true, y_pred):
    SS_res =  K.sum(K.square( y_true - y_pred )) 
    SS_tot = K.sum(K.square( y_true - K.mean(y_true) ) ) 
    return ( 1 - SS_res/(SS_tot + K.epsilon()) )

In [7]:
# X, y, ID
X_train_all = dt_model.loc[dt_model["IsTrainTest"] == "train"].drop(["ID", "y", "IsTrainTest"], axis = 1).as_matrix()
X_test = dt_model.loc[dt_model["IsTrainTest"] == "test"].drop(["ID", "y", "IsTrainTest"], axis = 1).as_matrix()
y_train_all = dt_model.loc[dt_model["IsTrainTest"] == "train"].y.values
y_test = dt_model.loc[dt_model["IsTrainTest"] == "test"].y.values
ID_train_all = dt_model.loc[dt_model["IsTrainTest"] == "train"].ID.values
ID_test = dt_model.loc[dt_model["IsTrainTest"] == "test"].ID.values

In [8]:
# keras
def model(dropout_level = 0.25, activation = 'tanh'):
    model = Sequential()
    model.add(Dense(1024, input_dim=input_dims, kernel_initializer="he_normal"))
    model.add(BatchNormalization())
    model.add(Activation(activation))
    model.add(Dropout(dropout_level))
    
    model.add(Dense(1024, kernel_initializer="he_normal"))
    model.add(BatchNormalization())
    model.add(Activation(activation))
    model.add(Dropout(dropout_level))
    
    model.add(Dense(1024, kernel_initializer="he_normal"))
    model.add(BatchNormalization())
    model.add(Activation(activation))
    model.add(Dropout(dropout_level))
    
    model.add(Dense(1024, kernel_initializer="he_normal"))
    model.add(BatchNormalization())
    model.add(Activation(activation))
    model.add(Dropout(dropout_level))
    
    model.add(Dense(1024, kernel_initializer="he_normal"))
    model.add(BatchNormalization())
    model.add(Activation(activation))
    model.add(Dropout(dropout_level))
    
    model.add(Dense(768, kernel_initializer="he_normal"))
    model.add(BatchNormalization())
    model.add(Activation(activation))
    model.add(Dropout(dropout_level))
    
    model.add(Dense(768, kernel_initializer="he_normal"))
    model.add(BatchNormalization())
    model.add(Activation(activation))
    model.add(Dropout(dropout_level))
    
    model.add(Dense(768, kernel_initializer="he_normal"))
    model.add(BatchNormalization())
    model.add(Activation(activation))
    model.add(Dropout(dropout_level))

    model.add(Dense(768, kernel_initializer="he_normal", kernel_regularizer = l2(1.e-5)))
    model.add(BatchNormalization())
    model.add(Activation(activation))
    model.add(Dropout(dropout_level))
    
    model.add(Dense(1, activation='linear'))
    
    model.compile(loss = "mean_squared_error", # one may use 'mean_absolute_error' as alternative
                  optimizer = "adam",
                  metrics = [r2_keras, "accuracy"] # you can add several if needed
                 )
    
    # Visualize NN architecture
    print(model.summary())
    return model

In [9]:
input_dims = X_train_all.shape[1]
estimator = KerasRegressor(
    build_fn = model, 
    nb_epoch = 300, 
    batch_size = 35,
    verbose = 1
)

In [10]:
# prepare callbacks
callbacks = [
    EarlyStopping(
        monitor = 'val_loss', 
        patience = 20,
        verbose = 1),
    ModelCheckpoint(
        model_path, 
        monitor = 'val_loss', 
        save_best_only = True, 
        verbose = 0)
]

In [11]:
#K-FOLD
n_splits = 5
kf = KFold(n_splits = n_splits)
kf.get_n_splits(X_train_all)

predictions = np.zeros((X_train_all.shape[0], n_splits))
score = 0

for fold, (ind_train, ind_valid) in enumerate(kf.split(X_train_all)):

    X_train, X_valid = X_train_all[ind_train, :], X_train_all[ind_valid, :]
    y_train, y_valid = y_train_all[ind_train], y_train_all[ind_valid]

    # fit estimator
    history = estimator.fit(
        X_train, 
        y_train, 
        batch_size = 64,
        epochs = 500,
        validation_data = (X_valid, y_valid),
        verbose = 2,
        callbacks = callbacks,
        shuffle = True
    )
    
    if os.path.isfile(model_path):
        history.model.load_weights(model_path)
    
    pred = history.model.predict(X_valid)
    
    score_fold = r2_score(y_valid, pred)
    score += score_fold

    print('Fold %d: Score %f'%(fold, score_fold))


score /= n_splits

print('=====================')

print( 'Final Score %f'%score)

print('=====================')

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1 (Dense)              (None, 1024)              1610752   
_________________________________________________________________
batch_normalization_1 (Batch (None, 1024)              4096      
_________________________________________________________________
activation_1 (Activation)    (None, 1024)              0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 1024)              0         
_________________________________________________________________
dense_2 (Dense)              (None, 1024)              1049600   
_________________________________________________________________
batch_normalization_2 (Batch (None, 1024)              4096      
_________________________________________________________________
activation_2 (Activation)    (None, 1024)              0         
__________

1s - loss: 147.4221 - r2_keras: 0.0787 - acc: 0.0000e+00 - val_loss: 85.8081 - val_r2_keras: 0.4398 - val_acc: 0.0024
Epoch 23/500
1s - loss: 200.6510 - r2_keras: -2.0911e-01 - acc: 2.9700e-04 - val_loss: 149.7524 - val_r2_keras: -4.0358e-03 - val_acc: 0.0000e+00
Epoch 24/500
2s - loss: 152.7059 - r2_keras: 0.0302 - acc: 0.0000e+00 - val_loss: 76.0262 - val_r2_keras: 0.5054 - val_acc: 0.0012
Epoch 25/500
1s - loss: 139.3512 - r2_keras: 0.1325 - acc: 2.9700e-04 - val_loss: 79.6005 - val_r2_keras: 0.4900 - val_acc: 0.0012
Epoch 26/500
1s - loss: 131.4802 - r2_keras: 0.1842 - acc: 2.9700e-04 - val_loss: 89.7833 - val_r2_keras: 0.4031 - val_acc: 0.0012
Epoch 27/500
2s - loss: 135.3313 - r2_keras: 0.1506 - acc: 2.9700e-04 - val_loss: 69.5847 - val_r2_keras: 0.5526 - val_acc: 0.0012
Epoch 28/500
1s - loss: 133.8252 - r2_keras: 0.1755 - acc: 0.0000e+00 - val_loss: 70.2465 - val_r2_keras: 0.5472 - val_acc: 0.0000e+00
Epoch 29/500
2s - loss: 124.5017 - r2_keras: 0.2275 - acc: 5.9400e-04 - val_l

Train on 3367 samples, validate on 842 samples
Epoch 1/500
2s - loss: 9769.7696 - r2_keras: -6.9711e+01 - acc: 0.0000e+00 - val_loss: 7927.3973 - val_r2_keras: -4.5701e+01 - val_acc: 0.0000e+00
Epoch 2/500
1s - loss: 7367.5559 - r2_keras: -5.1636e+01 - acc: 0.0000e+00 - val_loss: 4627.0102 - val_r2_keras: -2.6072e+01 - val_acc: 0.0000e+00
Epoch 3/500
1s - loss: 4808.6203 - r2_keras: -3.3196e+01 - acc: 0.0000e+00 - val_loss: 2539.5953 - val_r2_keras: -1.3715e+01 - val_acc: 0.0000e+00
Epoch 4/500
1s - loss: 2927.1137 - r2_keras: -1.9516e+01 - acc: 0.0000e+00 - val_loss: 1278.5499 - val_r2_keras: -6.2589e+00 - val_acc: 0.0000e+00
Epoch 5/500
1s - loss: 2037.4841 - r2_keras: -1.3717e+01 - acc: 2.9700e-04 - val_loss: 1116.7127 - val_r2_keras: -5.2830e+00 - val_acc: 0.0000e+00
Epoch 6/500
1s - loss: 1124.1732 - r2_keras: -6.9715e+00 - acc: 0.0000e+00 - val_loss: 762.8062 - val_r2_keras: -3.2999e+00 - val_acc: 0.0024
Epoch 7/500
2s - loss: 774.0754 - r2_keras: -4.4590e+00 - acc: 2.9700e-04 - 

Train on 3367 samples, validate on 842 samples
Epoch 1/500
2s - loss: 9828.6892 - r2_keras: -6.6012e+01 - acc: 0.0000e+00 - val_loss: 8163.4735 - val_r2_keras: -5.6171e+01 - val_acc: 0.0000e+00
Epoch 2/500
1s - loss: 7853.9678 - r2_keras: -5.2068e+01 - acc: 0.0000e+00 - val_loss: 4652.1143 - val_r2_keras: -3.1692e+01 - val_acc: 0.0000e+00
Epoch 3/500
1s - loss: 5690.7834 - r2_keras: -3.7809e+01 - acc: 0.0000e+00 - val_loss: 4249.3979 - val_r2_keras: -2.8757e+01 - val_acc: 0.0000e+00
Epoch 4/500
2s - loss: 3935.9462 - r2_keras: -2.5906e+01 - acc: 0.0000e+00 - val_loss: 3625.0107 - val_r2_keras: -2.4395e+01 - val_acc: 0.0000e+00
Epoch 5/500
1s - loss: 2682.7732 - r2_keras: -1.7098e+01 - acc: 0.0000e+00 - val_loss: 2367.3918 - val_r2_keras: -1.5817e+01 - val_acc: 0.0000e+00
Epoch 6/500
1s - loss: 1618.7499 - r2_keras: -1.0096e+01 - acc: 2.9700e-04 - val_loss: 1714.5012 - val_r2_keras: -1.1117e+01 - val_acc: 0.0000e+00
Epoch 7/500
2s - loss: 1236.2705 - r2_keras: -7.3123e+00 - acc: 2.9700e

0s - loss: 99.8065 - r2_keras: 0.3818 - acc: 2.9700e-04 - val_loss: 79.3565 - val_r2_keras: 0.4855 - val_acc: 0.0000e+00
Epoch 62/500
0s - loss: 96.6427 - r2_keras: 0.4003 - acc: 0.0000e+00 - val_loss: 69.5705 - val_r2_keras: 0.5572 - val_acc: 0.0012
Epoch 63/500
0s - loss: 97.0034 - r2_keras: 0.4173 - acc: 0.0000e+00 - val_loss: 68.0324 - val_r2_keras: 0.5696 - val_acc: 0.0012
Epoch 00062: early stopping
Fold 2: Score 0.618874
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_31 (Dense)             (None, 1024)              1610752   
_________________________________________________________________
batch_normalization_28 (Batc (None, 1024)              4096      
_________________________________________________________________
activation_28 (Activation)   (None, 1024)              0         
_________________________________________________________________
dropout_28 (Dropout)         (None, 1024

0s - loss: 133.0065 - r2_keras: 0.1481 - acc: 2.9700e-04 - val_loss: 98.7594 - val_r2_keras: 0.3925 - val_acc: 0.0000e+00
Epoch 20/500
0s - loss: 127.5612 - r2_keras: 0.2095 - acc: 0.0000e+00 - val_loss: 87.9520 - val_r2_keras: 0.4521 - val_acc: 0.0024
Epoch 21/500
0s - loss: 133.6195 - r2_keras: 0.1411 - acc: 2.9700e-04 - val_loss: 104.6115 - val_r2_keras: 0.3551 - val_acc: 0.0000e+00
Epoch 22/500
0s - loss: 138.8291 - r2_keras: 0.1152 - acc: 2.9700e-04 - val_loss: 111.1553 - val_r2_keras: 0.2795 - val_acc: 0.0000e+00
Epoch 23/500
0s - loss: 125.6824 - r2_keras: 0.2156 - acc: 0.0000e+00 - val_loss: 82.2570 - val_r2_keras: 0.4895 - val_acc: 0.0000e+00
Epoch 24/500
0s - loss: 122.8640 - r2_keras: 0.2230 - acc: 5.9400e-04 - val_loss: 91.5435 - val_r2_keras: 0.4119 - val_acc: 0.0000e+00
Epoch 25/500
0s - loss: 118.3552 - r2_keras: 0.2575 - acc: 2.9700e-04 - val_loss: 83.4461 - val_r2_keras: 0.4747 - val_acc: 0.0000e+00
Epoch 26/500
0s - loss: 117.3707 - r2_keras: 0.2788 - acc: 0.0015 - va

Train on 3368 samples, validate on 841 samples
Epoch 1/500
1s - loss: 9911.6602 - r2_keras: -6.5131e+01 - acc: 0.0000e+00 - val_loss: 8467.0732 - val_r2_keras: -6.4851e+01 - val_acc: 0.0000e+00
Epoch 2/500
0s - loss: 7694.0727 - r2_keras: -4.9522e+01 - acc: 0.0000e+00 - val_loss: 4748.9309 - val_r2_keras: -3.5961e+01 - val_acc: 0.0000e+00
Epoch 3/500
0s - loss: 5086.3709 - r2_keras: -3.2257e+01 - acc: 0.0000e+00 - val_loss: 3248.8483 - val_r2_keras: -2.4381e+01 - val_acc: 0.0000e+00
Epoch 4/500
0s - loss: 3222.6010 - r2_keras: -2.0518e+01 - acc: 0.0000e+00 - val_loss: 2249.6190 - val_r2_keras: -1.6600e+01 - val_acc: 0.0000e+00
Epoch 5/500
0s - loss: 2054.2886 - r2_keras: -1.2500e+01 - acc: 0.0000e+00 - val_loss: 1164.3658 - val_r2_keras: -8.0209e+00 - val_acc: 0.0000e+00
Epoch 6/500
0s - loss: 1274.7617 - r2_keras: -7.3594e+00 - acc: 2.9691e-04 - val_loss: 1026.7409 - val_r2_keras: -6.9689e+00 - val_acc: 0.0000e+00
Epoch 7/500
0s - loss: 847.5003 - r2_keras: -4.4695e+00 - acc: 5.9382e-

0s - loss: 109.5098 - r2_keras: 0.3395 - acc: 2.9691e-04 - val_loss: 53.0051 - val_r2_keras: 0.5984 - val_acc: 0.0000e+00
Epoch 62/500
0s - loss: 106.3331 - r2_keras: 0.3665 - acc: 0.0015 - val_loss: 60.4082 - val_r2_keras: 0.5424 - val_acc: 0.0000e+00
Epoch 63/500
0s - loss: 107.9065 - r2_keras: 0.3533 - acc: 0.0015 - val_loss: 67.1877 - val_r2_keras: 0.4906 - val_acc: 0.0000e+00
Epoch 64/500
0s - loss: 109.5214 - r2_keras: 0.3538 - acc: 0.0015 - val_loss: 57.3662 - val_r2_keras: 0.5663 - val_acc: 0.0000e+00
Epoch 65/500
0s - loss: 102.8165 - r2_keras: 0.3759 - acc: 0.0000e+00 - val_loss: 60.0273 - val_r2_keras: 0.5470 - val_acc: 0.0000e+00
Epoch 66/500
0s - loss: 106.4984 - r2_keras: 0.3630 - acc: 0.0015 - val_loss: 57.7389 - val_r2_keras: 0.5631 - val_acc: 0.0000e+00
Epoch 67/500
0s - loss: 101.6404 - r2_keras: 0.3972 - acc: 8.9074e-04 - val_loss: 60.9484 - val_r2_keras: 0.5391 - val_acc: 0.0000e+00
Epoch 68/500
0s - loss: 103.4919 - r2_keras: 0.3858 - acc: 5.9382e-04 - val_loss: 61