In [11]:
import pandas as pd
from pandas.tools.plotting import scatter_matrix as smplot
import numpy as np
import sklearn as sk
import matplotlib.pyplot as plt

from keras.models import Sequential
from keras.layers import Dense, Activation
from keras.optimizers import Adam, SGD
from keras.regularizers import l2
from keras.wrappers.scikit_learn import KerasClassifier

from sklearn.metrics import r2_score, accuracy_score, precision_score, f1_score, zero_one_loss, classification_report
from sklearn.model_selection import train_test_split, cross_val_score, KFold
from sklearn.decomposition import PCA

% matplotlib inline

In [23]:
df = pd.read_csv('../data/HR_comma_sep.csv')
df.head()

Unnamed: 0,satisfaction_level,last_evaluation,number_project,average_montly_hours,time_spend_company,Work_accident,left,promotion_last_5years,sales,salary
0,0.38,0.53,2,157,3,0,1,0,sales,low
1,0.8,0.86,5,262,6,0,1,0,sales,medium
2,0.11,0.88,7,272,4,0,1,0,sales,medium
3,0.72,0.87,5,223,5,0,1,0,sales,low
4,0.37,0.52,2,159,3,0,1,0,sales,low


In [24]:
jobs = df.sales.unique()
# categorize jobs column
dfjob = pd.get_dummies(df['sales'], prefix='job')
df = df.drop('sales', axis=1)
df = pd.concat([df, dfjob], axis=1)
# categorize salary column
dfslry = pd.get_dummies(df['salary'], prefix='salary_level')
df = df.drop('salary', axis=1)
df = pd.concat([df, dfslry], axis=1)

In [25]:
dffeature = df.drop('left', axis=1)

from sklearn.preprocessing import StandardScaler
ss = StandardScaler()

dffeature.iloc[:,2:5] = ss.fit_transform(dffeature.iloc[:,2:5])

X = dffeature.values

In [26]:
stopping_variance = 0.95 # stopping criteria
for n_comps in range(X.shape[1], 0, -1):
    pca = PCA(n_components = n_comps)
    pca.fit(X)
    if sum(pca.explained_variance_ratio_) >= stopping_variance:
        pca_sv = pca
    
    print('======================== ', n_comps, ' components =========================')
    print('===== explained variance ratio: ===========================================')
    print(pca.explained_variance_ratio_)
    print('\n')
    print("total variance explained: {:0.3f}".format(sum(pca.explained_variance_ratio_)))
    print('\n')

[  3.27878050e-01   1.95000057e-01   1.24071661e-01   9.81812308e-02
   5.09566149e-02   3.53485973e-02   2.67422105e-02   2.55568736e-02
   2.31983493e-02   1.58645338e-02   1.30853374e-02   1.26442883e-02
   1.17932476e-02   1.10656420e-02   1.07162373e-02   8.67959679e-03
   4.96001542e-03   4.25745676e-03   3.77508942e-33   1.39687060e-33]


total variance explained: 1.000


[  3.27878050e-01   1.95000057e-01   1.24071661e-01   9.81812308e-02
   5.09566149e-02   3.53485973e-02   2.67422105e-02   2.55568736e-02
   2.31983493e-02   1.58645338e-02   1.30853374e-02   1.26442883e-02
   1.17932476e-02   1.10656420e-02   1.07162373e-02   8.67959679e-03
   4.96001542e-03   4.25745676e-03   3.77508942e-33]


total variance explained: 1.000


[ 0.32787805  0.19500006  0.12407166  0.09818123  0.05095661  0.0353486
  0.02674221  0.02555687  0.02319835  0.01586453  0.01308534  0.01264429
  0.01179325  0.01106564  0.01071624  0.0086796   0.00496002  0.00425746]


total variance explained: 1.000


In [34]:
def RunModel(A, B, C, D, model, epos, callbacks=None, cvsplit = 0.0):
    history = model.fit(A, B, validation_split = cvsplit, epochs = epos, verbose=0, callbacks = callbacks)

#     print('The resulted weights W are: \n', model.get_weights()[0])
#     print('The resulted bias b is: ', model.get_weights()[1])
#     print('--------------------------- plot the loss function ------------------------------------')
    
    N_epo = len(history.history['loss'])
    plt.figure(figsize=(16,5))
    plt.plot(np.linspace(1, N_epo, N_epo), history.history['loss'], c='g', label='training set loss')
    if cvsplit > 0.:
        plt.plot(np.linspace(1, N_epo, N_epo), history.history['val_loss'], c='r', label='CV set loss')
    plt.title('model loss')
    plt.ylabel('loss')
    plt.xlabel('epoch')
#     Nticks = N_epo+1 if(N_epo < 40) else 21
    plt.xticks(np.linspace(0, N_epo, N_epo+1 if(N_epo < 40) else 21))
    plt.legend()
    plt.grid()
    plt.show()
    print('----------------------------- Accuracy Score ----------------------------------')
    print("Training set: {:0.3f}".format(accuracy_score(B, model.predict(A).round())))
    print("Testing set: {:0.3f}".format(accuracy_score(D, model.predict(C).round())))
    print('----------------------------- Misclassification Rate ----------------------------------')
    print("Training set: {:0.3f}".format(zero_one_loss(B, model.predict(A).round())))
    print("Testing set: {:0.3f}".format(zero_one_loss(D, model.predict(C).round())))
    print('----------------------------- Classification Report ----------------------------------')
    print("Training set:", classification_report(B, model.predict(A).round()))
    print("Testing set:", classification_report(D, model.predict(C).round()))

    plt.scatter(A[:,0], B, c='blue', label='real', alpha=0.8)
    plt.scatter(A[:,0], model.predict(A).round(), c='red', label='prediction', alpha=0.2)
    plt.legend()
    plt.grid()
    plt.show()

In [29]:
y = df.left.values
X_1pca = pca_sv.transform(X)
A, C, B, D = train_test_split(X_1pca, y, test_size=0.1)

In [35]:
def CreateDNNModel(input_dim, epos, opt=Adam(lr=0.1), lossfunc = 'binary_crossentropy', metrics = None,
                   hidden_nodes=[1], activators = ['sigmoid'], reg=None, verbose=1):
    
    model = Sequential()
    NNlayout = zip(hidden_nodes, activators)
    for i_layer, layerparam in enumerate(NNlayout):
        if i_layer == 0:
            model.add(Dense(layerparam[0], input_dim=input_dim, kernel_regularizer = reg))
        else:
            model.add(Dense(layerparam[0], kernel_regularizer = reg))
        model.add(Activation(layerparam[1]))
    if verbose:
        print(model.summary())    
    model.compile(optimizer = opt, loss = lossfunc, metrics=metrics)
#     print('The initial weights W are: \n', model.get_weights()[0])
#     print('The initial bias b is: ', model.get_weights()[1])
#     print('---------------------------------------------------------------')
    return model

### use "CLOSURE" to pass parameters in the KerasClassifier fn

In [190]:
# use a closure
def CreateModel(input_dim, epos, opt=Adam(lr=0.1), lossfunc = 'binary_crossentropy',
                metrics = None, hidden_nodes=[1], activators = ['sigmoid'], reg=None, verbose=0):        
    def do():
        return CreateDNNModel(input_dim, epos, reg=reg, hidden_nodes=hidden_nodes, opt=opt, lossfunc=lossfunc,
                              activators=activators, verbose=verbose, metrics=metrics)
    return do

def KerasCrossVal(A, B, myepos, myreg, myhidden_nodes, myactivators, myopt=Adam(lr=0.1)): 
    model = KerasClassifier(build_fn=CreateModel(A.shape[1], myepos, reg=myreg, opt=myopt, hidden_nodes=myhidden_nodes, 
                                                 activators=myactivators, metrics=['accuracy']), 
                            verbose=0)

    scores = cross_val_score(model, A, B, cv = KFold(5, shuffle=True))

    return scores

In [188]:
myepos = 100
myreg = l2(0.01)
myhidden_nodes=[20, 1]
myactivators = ['relu','sigmoid']
KerasCrossVal(A, B, myepos, myreg, myhidden_nodes, myactivators)

array([ 0.90666667,  0.86703704,  0.83259259,  0.78518519,  0.7706558 ])

## use Cross Validation to find the best parameter settings

In [193]:
myepos = 20
myreg = l2(0.01)
hidden_nodes_iter = [[5,1],[10,1]]
myactivators_iter = ['relu','sigmoid']
for myhidden_nodes in hidden_nodes_iter:
    print("================= tested DNN with layers : ", myhidden_nodes, "==================" )
    scores = KerasCrossVal(A, B, myepos, myreg, myhidden_nodes, myactivators)
    print("The cross validation accuracy is {:0.4f} ± {:0.4f}".format(scores.mean(), scores.std()))

The cross validation accuracy is 0.8598 ± 0.0446
The cross validation accuracy is 0.8587 ± 0.0243


## Automate the process 

In [158]:
def ConfLayout(num_layers, num_nodes, output_nodes, activators):
    layoutconf = {}
    for n_layers in num_layers:
        
        last_layer_nodes = [output_nodes]
        output_activator = 'sigmoid' if output_nodes==1 else 'softmax'
        
        if n_layers == 0: # logistic regression
            layoutconf[n_layer] = np.array(last_layer_nodes.reverse())
            Activator = [output_activator]
            
        else: # at least one layer NN
            for i_layer in range(1,n_layers+1):
                if i_layer == 1:
                    last_layer_nodes = last_layer_nodes * len(num_nodes)
                    last_layer_nodes = [last_layer_nodes] + [num_nodes]
                else:
                    last_layer_nodes = [i*len(num_nodes) for i in last_layer_nodes]
                    curr_layer_nodes = [val for val in num_nodes for _ in range(len(num_nodes)**(i_layer-1))]
                    last_layer_nodes.append(curr_layer_nodes)
            
            Activator = [[i]*n_layers for i in activators]
        
        for i in range(len(Activator)):
            Activator[i].append(output_activator)
            
        last_layer_nodes.reverse()
        
        layoutconf[n_layers] = {'layerconf': np.array(last_layer_nodes),
                               'activatorconf': Activator}
        
    return layoutconf

In [163]:
num_layer_opts = [1, 2, 3]
num_node_opts = [5, 20]
output_nodes = 1
activator_opts = ['relu', 'tanh']

layerconfigall = ConfLayout(num_layer_opts, num_node_opts, output_nodes, activator_opts)

In [164]:
layerconfigall[3]

{'activatorconf': [['relu', 'relu', 'relu', 'sigmoid'],
  ['tanh', 'tanh', 'tanh', 'sigmoid']],
 'layerconf': array([[ 5,  5,  5,  5, 20, 20, 20, 20],
        [ 5,  5, 20, 20,  5,  5, 20, 20],
        [ 5, 20,  5, 20,  5, 20,  5, 20],
        [ 1,  1,  1,  1,  1,  1,  1,  1]])}

In [166]:
layerconfigall[3]['layerconf'][:,4]

array([20,  5,  5,  1])

In [208]:
myepos = 200
lr_reg = [0.001, 0.01, 0.1] 
reg_opts = [l2(i) for i in lr_reg]

### need to be implemented:
# myopt = Adame(lr=lr_opt[i])
# batch_size = 32, ...
# decay

df_result = pd.DataFrame()
for nlayer in num_layer_opts:
    for myactivators in layerconfigall[nlayer]['activatorconf']:
        for i in range(layerconfigall[nlayer]['layerconf'].shape[1]):
            myhidden_nodes = layerconfigall[nlayer]['layerconf'][:,i].tolist()
            for myreglr in lr_reg:  
                myreg = l2(myreglr)
                print('====================== start new evaluation ================')
                scores = KerasCrossVal(A, B, myepos, myreg, myhidden_nodes, myactivators)

                d = {'nlayers':nlayer,'layers':[myhidden_nodes],'activator':[myactivators],
                     'lr':'l2','lr_reg':myreglr,
                     'opt':'Adam','lr_opt':0.1,
                     'batch_size':'default32',
                     'score(mean)': scores.mean(),'score(std)': scores.std()}
                df_result = pd.concat([df_result, pd.DataFrame(d)])
                
                print(d) 

{'layers': [[5, 1]], 'score(mean)': 0.87287376666791749, 'lr_reg': 0.001, 'opt': 'Adam', 'activator': [['relu', 'sigmoid']], 'lr_opt': 0.1, 'batch_size': 'default32', 'lr': 'l2', 'score(std)': 0.050029258346306359, 'nlayers': 1}
{'layers': [[5, 1]], 'score(mean)': 0.78702899561464501, 'lr_reg': 0.01, 'opt': 'Adam', 'activator': [['relu', 'sigmoid']], 'lr_opt': 0.1, 'batch_size': 'default32', 'lr': 'l2', 'score(std)': 0.052673589795010987, 'nlayers': 1}
{'layers': [[5, 1]], 'score(mean)': 0.76064893719210136, 'lr_reg': 0.1, 'opt': 'Adam', 'activator': [['relu', 'sigmoid']], 'lr_opt': 0.1, 'batch_size': 'default32', 'lr': 'l2', 'score(std)': 0.0089913082318132508, 'nlayers': 1}
{'layers': [[20, 1]], 'score(mean)': 0.92458641751456772, 'lr_reg': 0.001, 'opt': 'Adam', 'activator': [['relu', 'sigmoid']], 'lr_opt': 0.1, 'batch_size': 'default32', 'lr': 'l2', 'score(std)': 0.0054804142171785966, 'nlayers': 1}
{'layers': [[20, 1]], 'score(mean)': 0.8442141259848821, 'lr_reg': 0.01, 'opt': 'Ada

{'layers': [[5, 20, 1]], 'score(mean)': 0.89139700574531222, 'lr_reg': 0.01, 'opt': 'Adam', 'activator': [['tanh', 'tanh', 'sigmoid']], 'lr_opt': 0.1, 'batch_size': 'default32', 'lr': 'l2', 'score(std)': 0.018235550867995297, 'nlayers': 2}
{'layers': [[5, 20, 1]], 'score(mean)': 0.76064926648803133, 'lr_reg': 0.1, 'opt': 'Adam', 'activator': [['tanh', 'tanh', 'sigmoid']], 'lr_opt': 0.1, 'batch_size': 'default32', 'lr': 'l2', 'score(std)': 0.0093888889904995103, 'nlayers': 2}
{'layers': [[20, 5, 1]], 'score(mean)': 0.82184669218610651, 'lr_reg': 0.001, 'opt': 'Adam', 'activator': [['tanh', 'tanh', 'sigmoid']], 'lr_opt': 0.1, 'batch_size': 'default32', 'lr': 'l2', 'score(std)': 0.078821077785506191, 'nlayers': 2}
{'layers': [[20, 5, 1]], 'score(mean)': 0.76064794920275802, 'lr_reg': 0.01, 'opt': 'Adam', 'activator': [['tanh', 'tanh', 'sigmoid']], 'lr_opt': 0.1, 'batch_size': 'default32', 'lr': 'l2', 'score(std)': 0.0079830812055539189, 'nlayers': 2}
{'layers': [[20, 5, 1]], 'score(mean)'

{'layers': [[20, 20, 5, 1]], 'score(mean)': 0.76064901952712893, 'lr_reg': 0.01, 'opt': 'Adam', 'activator': [['relu', 'relu', 'relu', 'sigmoid']], 'lr_opt': 0.1, 'batch_size': 'default32', 'lr': 'l2', 'score(std)': 0.0043550285493839311, 'nlayers': 3}
{'layers': [[20, 20, 5, 1]], 'score(mean)': 0.76064923911366678, 'lr_reg': 0.1, 'opt': 'Adam', 'activator': [['relu', 'relu', 'relu', 'sigmoid']], 'lr_opt': 0.1, 'batch_size': 'default32', 'lr': 'l2', 'score(std)': 0.0033846834404402354, 'nlayers': 3}
{'layers': [[20, 20, 20, 1]], 'score(mean)': 0.7873896230207571, 'lr_reg': 0.001, 'opt': 'Adam', 'activator': [['relu', 'relu', 'relu', 'sigmoid']], 'lr_opt': 0.1, 'batch_size': 'default32', 'lr': 'l2', 'score(std)': 0.058748628475359166, 'nlayers': 3}
{'layers': [[20, 20, 20, 1]], 'score(mean)': 0.76064877251544027, 'lr_reg': 0.01, 'opt': 'Adam', 'activator': [['relu', 'relu', 'relu', 'sigmoid']], 'lr_opt': 0.1, 'batch_size': 'default32', 'lr': 'l2', 'score(std)': 0.0043936883027955991, 'n

{'layers': [[20, 20, 20, 1]], 'score(mean)': 0.76064885491006851, 'lr_reg': 0.01, 'opt': 'Adam', 'activator': [['tanh', 'tanh', 'tanh', 'sigmoid']], 'lr_opt': 0.1, 'batch_size': 'default32', 'lr': 'l2', 'score(std)': 0.0044078518994089943, 'nlayers': 3}
{'layers': [[20, 20, 20, 1]], 'score(mean)': 0.76064959592083603, 'lr_reg': 0.1, 'opt': 'Adam', 'activator': [['tanh', 'tanh', 'tanh', 'sigmoid']], 'lr_opt': 0.1, 'batch_size': 'default32', 'lr': 'l2', 'score(std)': 0.0047189557875356083, 'nlayers': 3}
