In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import cross_val_score, KFold, train_test_split, GridSearchCV, StratifiedKFold
from sklearn.feature_selection import SelectFromModel
from sklearn.metrics import r2_score, mean_squared_error

In [2]:
# Keras is a deep learning library that wraps the efficient numerical libraries Theano and TensorFlow.
# It provides a clean and simple API that allows you to define and evaluate deep learning models in just a few lines of code.from keras.models import Sequential, load_model
from keras.models import Sequential, load_model
from keras.layers import Dense, Dropout, BatchNormalization, Activation
from keras.wrappers.scikit_learn import KerasRegressor
from keras.callbacks import EarlyStopping, ModelCheckpoint
# define custom R2 metrics for Keras backend
from keras import backend as K
# to tune the NN
from keras.constraints import maxnorm
from keras.optimizers import SGD, Adam
from keras.layers.advanced_activations import PReLU
from keras.regularizers import l2
import h5py

Using TensorFlow backend.


In [3]:
# define path to save model
import os
model_path = "../../data/Mercedes_Benz_Greener_Manufacturing/model/model_nn.h5"

## 6. Model

In [4]:
# data
dt_model = pd.read_csv("../../data/Mercedes_Benz_Greener_Manufacturing/data/dt_all_preprocess.csv")

In [5]:
# remove ohe
dt_model = dt_model.drop(dt_model.filter(regex = "Encode_ohe").columns, axis = 1)

In [6]:
# r_2 for nn
def r2_keras(y_true, y_pred):
    SS_res =  K.sum(K.square( y_true - y_pred )) 
    SS_tot = K.sum(K.square( y_true - K.mean(y_true) ) ) 
    return ( 1 - SS_res/(SS_tot + K.epsilon()) )

In [7]:
# X, y, ID
X_train_all = dt_model.loc[dt_model["IsTrainTest"] == "train"].drop(["ID", "y", "IsTrainTest"], axis = 1).as_matrix()
X_test = dt_model.loc[dt_model["IsTrainTest"] == "test"].drop(["ID", "y", "IsTrainTest"], axis = 1).as_matrix()
y_train_all = dt_model.loc[dt_model["IsTrainTest"] == "train"].y.values
y_test = dt_model.loc[dt_model["IsTrainTest"] == "test"].y.values
ID_train_all = dt_model.loc[dt_model["IsTrainTest"] == "train"].ID.values
ID_test = dt_model.loc[dt_model["IsTrainTest"] == "test"].ID.values

In [8]:
# keras
def model(dropout_level = 0.25, activation = 'tanh'):
    model = Sequential()
    model.add(Dense(1024, input_dim=input_dims, kernel_initializer="he_normal"))
    model.add(BatchNormalization())
    model.add(Activation(activation))
    model.add(Dropout(dropout_level))
    
    model.add(Dense(1024, kernel_initializer="he_normal"))
    model.add(BatchNormalization())
    model.add(Activation(activation))
    model.add(Dropout(dropout_level))
    
    model.add(Dense(1024, kernel_initializer="he_normal"))
    model.add(BatchNormalization())
    model.add(Activation(activation))
    model.add(Dropout(dropout_level))
    
    model.add(Dense(1024, kernel_initializer="he_normal"))
    model.add(BatchNormalization())
    model.add(Activation(activation))
    model.add(Dropout(dropout_level))
    
    model.add(Dense(1024, kernel_initializer="he_normal"))
    model.add(BatchNormalization())
    model.add(Activation(activation))
    model.add(Dropout(dropout_level))
    
    model.add(Dense(768, kernel_initializer="he_normal"))
    model.add(BatchNormalization())
    model.add(Activation(activation))
    model.add(Dropout(dropout_level))
    
    model.add(Dense(768, kernel_initializer="he_normal"))
    model.add(BatchNormalization())
    model.add(Activation(activation))
    model.add(Dropout(dropout_level))
    
    model.add(Dense(768, kernel_initializer="he_normal"))
    model.add(BatchNormalization())
    model.add(Activation(activation))
    model.add(Dropout(dropout_level))

    model.add(Dense(768, kernel_initializer="he_normal", kernel_regularizer = l2(1.e-5)))
    model.add(BatchNormalization())
    model.add(Activation(activation))
    model.add(Dropout(dropout_level))
    
    model.add(Dense(1, activation='linear'))
    
    model.compile(loss = "mean_squared_error", # one may use 'mean_absolute_error' as alternative
                  optimizer = "adam",
                  metrics = [r2_keras, "accuracy"] # you can add several if needed
                 )
    
    # Visualize NN architecture
    print(model.summary())
    return model

In [9]:
input_dims = X_train_all.shape[1]
estimator = KerasRegressor(
    build_fn = model, 
    nb_epoch = 300, 
    batch_size = 35,
    verbose = 1
)

In [10]:
# prepare callbacks
callbacks = [
    EarlyStopping(
        monitor = 'val_loss', 
        patience = 20,
        verbose = 1),
    ModelCheckpoint(
        model_path, 
        monitor = 'val_loss', 
        save_best_only = True, 
        verbose = 0)
]

In [11]:
#K-FOLD
n_splits = 5
kf = KFold(n_splits = n_splits)
kf.get_n_splits(X_train_all)

predictions = np.zeros((X_train_all.shape[0], n_splits))
score = 0

for fold, (ind_train, ind_valid) in enumerate(kf.split(X_train_all)):

    X_train, X_valid = X_train_all[ind_train, :], X_train_all[ind_valid, :]
    y_train, y_valid = y_train_all[ind_train], y_train_all[ind_valid]

    # fit estimator
    history = estimator.fit(
        X_train, 
        y_train, 
        batch_size = 64,
        epochs = 500,
        validation_data = (X_valid, y_valid),
        verbose = 2,
        callbacks = callbacks,
        shuffle = True
    )
    
    if os.path.isfile(model_path):
        history.model.load_weights(model_path)
    
    pred = history.model.predict(X_valid)
    
    score_fold = r2_score(y_valid, pred)
    score += score_fold

    print('Fold %d: Score %f'%(fold, score_fold))


score /= n_splits

print('=====================')

print( 'Final Score %f'%score)

print('=====================')

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1 (Dense)              (None, 1024)              1394688   
_________________________________________________________________
batch_normalization_1 (Batch (None, 1024)              4096      
_________________________________________________________________
activation_1 (Activation)    (None, 1024)              0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 1024)              0         
_________________________________________________________________
dense_2 (Dense)              (None, 1024)              1049600   
_________________________________________________________________
batch_normalization_2 (Batch (None, 1024)              4096      
_________________________________________________________________
activation_2 (Activation)    (None, 1024)              0         
__________

2s - loss: 136.6703 - r2_keras: 0.1447 - acc: 2.9700e-04 - val_loss: 77.6855 - val_r2_keras: 0.4975 - val_acc: 0.0000e+00
Epoch 23/500
2s - loss: 114.6457 - r2_keras: 0.2843 - acc: 8.9100e-04 - val_loss: 79.6507 - val_r2_keras: 0.4944 - val_acc: 0.0000e+00
Epoch 24/500
1s - loss: 112.9420 - r2_keras: 0.3063 - acc: 8.9100e-04 - val_loss: 79.0859 - val_r2_keras: 0.4909 - val_acc: 0.0000e+00
Epoch 25/500
1s - loss: 117.1370 - r2_keras: 0.2839 - acc: 5.9400e-04 - val_loss: 80.5651 - val_r2_keras: 0.4866 - val_acc: 0.0012
Epoch 26/500
2s - loss: 125.1020 - r2_keras: 0.2209 - acc: 0.0012 - val_loss: 71.0640 - val_r2_keras: 0.5425 - val_acc: 0.0000e+00
Epoch 27/500
1s - loss: 125.0008 - r2_keras: 0.2153 - acc: 5.9400e-04 - val_loss: 74.3640 - val_r2_keras: 0.5333 - val_acc: 0.0024
Epoch 28/500
1s - loss: 115.3107 - r2_keras: 0.2877 - acc: 2.9700e-04 - val_loss: 77.8244 - val_r2_keras: 0.5004 - val_acc: 0.0000e+00
Epoch 29/500
2s - loss: 121.4988 - r2_keras: 0.2625 - acc: 5.9400e-04 - val_loss

AttributeError: 'History' object has no attribute 'load_weights'

In [None]:
r2_score(y_valid, pred)

In [None]:
plt.scatter(pred, y_valid)
plt.show()