In [1]:
import pandas as pd
import numpy as np
from keras.models import Sequential, load_model
from keras.layers import Dense, Dropout
import matplotlib.pyplot as plt
from keras.regularizers import l2
from sklearn.metrics import confusion_matrix, roc_auc_score
from keras.callbacks import ModelCheckpoint, ReduceLROnPlateau, EarlyStopping
from keras.optimizers import SGD, Adam

Using TensorFlow backend.


In [2]:
###GET DATA
#df = pd.read_csv('no_show_v2.csv')
features = pd.read_csv('hr_features.csv')
labels = pd.read_csv('hr_labels.csv')
#df = pd.read_csv('scaled.csv')

In [4]:
#Tidy Up Data, unique to data
#labels = df['Outcome'].apply(lambda x: 1 if x == 0.95 else 0)
#df.drop('Outcome', axis = 1, inplace = True)
#features = df.copy()

In [5]:
cols = ['input_activation', 'input_dropout', 'input_nodes', 'layer_1_activation', 'layer_1_dropout',
        'layer_1_nodes', 'layer_2_activation', 'layer_2_dropout', 'layer_2_nodes', 'layer_3_activation', 
        'layer_3_dropout', 'layer_3_nodes', 'model_layers', 'model_optimizers', 'model_size', 'score']

new_df = pd.DataFrame(columns = cols)

In [6]:
def split_data(features, labels):
    assert(len(features) == len(labels))
    split = len(labels) - int(.2*len(labels))
    x_train, x_test = np.array(features[:split]), np.array(features[split:])
    y_train, y_test = np.array(labels[:split]), np.array(labels[split:])
    
    return x_train, x_test, y_train, y_test

x_train, x_test, y_train, y_test = split_data(features, labels)

In [7]:
search_space = {
    'size': [8, 16, 32, 64, 128], 
    'layers': [1, 2, 3],
    'nodes': [32, 64,128, 512, 1024],
    'activation': ['relu', 'tanh'],
    'optimizers': ['sgd', 'adam'],
    'dropout': [0, .3, .4, .5, .6, .7]
}

#initial random probabilities
start_probs = {
    'model_size': [.2, .2, .2, .2, .2], 
    'model_layers': [.33, .34, .33],
    'model_optimizers': [0.5, 0.5],
    'input_nodes': [.2, .2, .2, .2, .2],
    'input_activation': [.5, .5],
    'input_dropout': [.1666, .1666, .167, .1666, .1666, .1666],
    'layer_1_nodes': [.2, .2, .2, .2, .2],
    'layer_1_activation': [.5, .5],
    'layer_1_dropout': [.1666, .1666, .167, .1666, .1666, .1666],
    'layer_2_nodes': [.2, .2, .2, .2, .2],
    'layer_2_activation': [.5, .5],
    'layer_2_dropout': [.1666, .1666, .167, .1666, .1666, .1666],
    'layer_3_nodes': [.2, .2, .2, .2, .2],
    'layer_3_activation': [.5, .5],
    'layer_3_dropout': [.1666, .1666, .167, .1666, .1666, .1666],
}

#return random parameter
def random_hyper_parameters(param, layer):
    hps = np.random.choice(search_space[param], p = start_probs['{}_{}' .format(layer, param)])
    return hps

In [9]:
random_hyper_parameters('nodes', 'layer_1')

64

In [10]:
def random_model_search(num_models, best_score, num):
    for j in range(0, num_models):
        model_dict = {}
        input_shape = x_train.shape[1]
        r_model = Sequential()
        input_nodes = random_hyper_parameters('nodes','input')
        input_activation = random_hyper_parameters('activation', 'input')
        input_dropout = random_hyper_parameters('dropout', 'input')
        
        model_dict['input_nodes'] = input_nodes
        model_dict['input_activation'] = input_activation
        model_dict['input_dropout'] = input_dropout
        
        r_model.add(Dense(input_nodes, input_shape = (input_shape,), activation = input_activation, activity_regularizer = l2(0.001),
               kernel_initializer = 'truncated_normal'))
        r_model.add(Dropout(input_dropout))
        
        
        hidden = random_hyper_parameters('layers', 'model')
        model_dict['model_layers'] = hidden
        
        for i in range(int(hidden)):
            layer_name = 'layer_{}' .format(i + 1)
            hidden_nodes = random_hyper_parameters('nodes', layer_name)
            hidden_activation = random_hyper_parameters('activation', layer_name)
            r_model.add(Dense(hidden_nodes, activation = hidden_activation, activity_regularizer = l2(0.001),
               kernel_initializer = 'truncated_normal'))
            dropout = random_hyper_parameters('dropout', layer_name)
            r_model.add(Dropout(dropout))
            
            model_dict['{}_nodes' .format(layer_name)] = hidden_nodes
            model_dict['{}_activation' .format(layer_name)] = hidden_activation
            model_dict['{}_dropout' .format(layer_name)] = dropout
        
        r_model.add(Dense(1, activation = 'sigmoid'))
    
        ###Create checkpoint
        r_callbacks = ModelCheckpoint(filepath = 'churn_weights_r.hdf5', monitor = 'val_loss', save_best_only = True, verbose = False)
        r_reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=20, min_lr=0.00001, verbose = 0)
        r_early_stop = EarlyStopping(monitor='val_loss', min_delta=0, patience=60)
      
        ##compile
        optimizer = random_hyper_parameters('optimizers', 'model')
        r_model.compile(loss = 'binary_crossentropy', optimizer = optimizer, metrics = ['acc'] )
    
        batch_size = random_hyper_parameters('size', 'model')
        
        model_dict['model_optimizers'] = optimizer
        model_dict['model_size'] = batch_size
        ###
        history = r_model.fit(x_train, y_train, epochs = 1000, validation_split = 0.1, batch_size = batch_size, verbose = 0, callbacks = [r_callbacks, r_reduce_lr, r_early_stop])
    
    
        r_model.load_weights('churn_weights_r.hdf5')
        
        #Best model ROC/AUC
        new_preds = r_model.predict(x_test)
        new_score = roc_auc_score(y_test, new_preds)
        
        #Best model ACC
        #new_score = r_model.evaluate(x_test, y_test)
        #new_score = new_score[1]
        
        model_dict['score'] = new_score
        
        #compare to best score, if best, store model
        if new_score >= best_score:
            r_model.save('test_model.h5' .format(num))  
            best_score = new_score
        del r_model
        
        global new_df
        t_df = pd.DataFrame([model_dict])
        new_df = new_df.append(t_df, ignore_index = True)
      
    return best_score

In [11]:
def get_new_dists():
    global new_df
    
    top_num = 3#int(len(new_df)*.2)
    dists = {}
    sorted_df = new_df.sort_values(by = 'score')
    tail_df = sorted_df.tail(top_num).copy() 
    for i in tail_df:
        if i != 'score':
            idx = i.split('_')[-1:][0]
            t_df = tail_df[i].value_counts()
            t_df = t_df.reindex(search_space[idx])
            t_df.fillna(0, inplace = True)
            values = t_df.values
            new_dist = []
            value_sum = sum(values)
            if value_sum != 0:
                for n in range(len(values)):
                    lr = .2
                    new = values[n]/float(value_sum) 
                    orig = start_probs[i][n]
                    y = orig - ((orig-new)*lr)
                    new_dist.append(y)  
            else:
                t_dist = start_probs[i]
                new_dist.append(t_dist)  
            dists[i] = new_dist
    return dists

In [12]:
def build_model():
    best_score = 0
    for i in range(3):
        t_score = random_model_search(3, best_score, i) 
        best_score = t_score
        global start_probs
        start_probs = get_new_dists()

In [13]:
build_model()

In [7]:
test_model = load_model('pima_model.h5')

In [8]:
test_model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_168 (Dense)            (None, 1024)              9216      
_________________________________________________________________
dropout_126 (Dropout)        (None, 1024)              0         
_________________________________________________________________
dense_169 (Dense)            (None, 32)                32800     
_________________________________________________________________
dropout_127 (Dropout)        (None, 32)                0         
_________________________________________________________________
dense_170 (Dense)            (None, 1024)              33792     
_________________________________________________________________
dropout_128 (Dropout)        (None, 1024)              0         
_________________________________________________________________
dense_171 (Dense)            (None, 1)                 1025      
Total para

In [11]:
score = test_model.evaluate(x_test, y_test)
score



[0.45168367028236389, 0.81506849315068497]

In [18]:
#new_df.tail(20)

In [23]:
new_pred = test_model.predict(x_test)
#new_pred = (new_pred > 0.29).astype('int')
new_score = roc_auc_score(y_test, new_pred)
print('ACC: {}, AUC: {}' .format(score[1], new_score))

ACC: 0.815068493151, AUC: 0.872340425532


In [14]:
new_df

Unnamed: 0,input_activation,input_dropout,input_nodes,layer_1_activation,layer_1_dropout,layer_1_nodes,layer_2_activation,layer_2_dropout,layer_2_nodes,layer_3_activation,layer_3_dropout,layer_3_nodes,model_layers,model_optimizers,model_size,score
0,relu,0.5,128,relu,0.4,1024,,,,,,,1,sgd,8,0.835078
1,tanh,0.4,128,relu,0.6,1024,relu,0.4,64.0,tanh,0.5,128.0,3,sgd,32,0.842583
2,relu,0.5,32,relu,0.6,512,relu,0.4,1024.0,,,,2,sgd,32,0.830816
3,tanh,0.3,128,tanh,0.4,128,relu,0.4,64.0,relu,0.4,64.0,3,adam,128,0.846011
4,relu,0.4,128,relu,0.6,1024,,,,,,,1,sgd,32,0.829426
5,relu,0.7,64,relu,0.4,64,relu,0.3,64.0,tanh,0.4,128.0,3,sgd,16,0.766747
6,tanh,0.0,128,relu,0.6,512,,,,,,,1,sgd,8,0.847401
7,tanh,0.4,32,relu,0.6,64,,,,,,,1,sgd,8,0.849903
8,tanh,0.5,32,tanh,0.4,1024,tanh,0.0,64.0,relu,0.5,64.0,3,sgd,8,0.848884


In [16]:
sorted_df= new_df.sort_values(by='score')
sorted_df.tail(10)

Unnamed: 0,input_activation,input_dropout,input_nodes,layer_1_activation,layer_1_dropout,layer_1_nodes,layer_2_activation,layer_2_dropout,layer_2_nodes,layer_3_activation,layer_3_dropout,layer_3_nodes,model_layers,model_optimizers,model_size,score
5,relu,0.7,64,relu,0.4,64,relu,0.3,64.0,tanh,0.4,128.0,3,sgd,16,0.766747
4,relu,0.4,128,relu,0.6,1024,,,,,,,1,sgd,32,0.829426
2,relu,0.5,32,relu,0.6,512,relu,0.4,1024.0,,,,2,sgd,32,0.830816
0,relu,0.5,128,relu,0.4,1024,,,,,,,1,sgd,8,0.835078
1,tanh,0.4,128,relu,0.6,1024,relu,0.4,64.0,tanh,0.5,128.0,3,sgd,32,0.842583
3,tanh,0.3,128,tanh,0.4,128,relu,0.4,64.0,relu,0.4,64.0,3,adam,128,0.846011
6,tanh,0.0,128,relu,0.6,512,,,,,,,1,sgd,8,0.847401
8,tanh,0.5,32,tanh,0.4,1024,tanh,0.0,64.0,relu,0.5,64.0,3,sgd,8,0.848884
7,tanh,0.4,32,relu,0.6,64,,,,,,,1,sgd,8,0.849903


In [15]:
start_probs

{'input_activation': [0.39466666666666661, 0.60533333333333339],
 'input_dropout': [0.15196586666666667,
  0.13863253333333334,
  0.24817066666666665,
  0.29063253333333333,
  0.085299200000000006,
  0.085299200000000006],
 'input_nodes': [0.27839999999999998,
  0.1024,
  0.41439999999999999,
  0.1024,
  0.1024],
 'layer_1_activation': [0.624, 0.376],
 'layer_1_dropout': [0.085299200000000006,
  0.085299200000000006,
  0.30150399999999999,
  0.085299200000000006,
  0.35729919999999998,
  0.085299200000000006],
 'layer_1_nodes': [0.1024,
  0.16906666666666667,
  0.15573333333333333,
  0.21173333333333333,
  0.36106666666666665],
 'layer_2_activation': [0.54399999999999993, 0.45599999999999996],
 'layer_2_dropout': [0.28529920000000003,
  0.085299200000000006,
  0.373504,
  0.085299200000000006,
  0.085299200000000006,
  0.085299200000000006],
 'layer_2_nodes': [0.1024,
  0.52639999999999998,
  0.1024,
  0.1024,
  0.16640000000000002],
 'layer_3_activation': [0.53600000000000003, 0.46399

In [34]:
#preds = model.predict(x_test)
evals = [1 if i > .15 else 0 for i in new_pred]
cf = confusion_matrix(y_test, evals)
cf

array([[32, 34],
       [ 4, 40]])