In [1]:
import pandas as pd
import tensorflow as tf
from bayes_opt import BayesianOptimization
from sklearn.cross_validation import train_test_split
from sklearn.preprocessing import LabelEncoder
import numpy as np



You can install Bayesian Optimization by,
```bash
pip install bayesian-optimization
```

In [2]:
df = pd.read_csv('Iris.csv')
df.head()

Unnamed: 0,Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,1,5.1,3.5,1.4,0.2,Iris-setosa
1,2,4.9,3.0,1.4,0.2,Iris-setosa
2,3,4.7,3.2,1.3,0.2,Iris-setosa
3,4,4.6,3.1,1.5,0.2,Iris-setosa
4,5,5.0,3.6,1.4,0.2,Iris-setosa


In [3]:
x_data = df.iloc[:, :-1].values.astype(np.float32)
y_datalabel = df.iloc[:, -1]
y_data = LabelEncoder().fit_transform(df.iloc[:, -1])

onehot = np.zeros((y_data.shape[0], np.unique(y_data).shape[0]))
for i in range(y_data.shape[0]):
    onehot[i, y_data[i]] = 1.0
    
x_train, x_test, y_train, y_test, y_train_label, y_test_label = train_test_split(x_data, onehot, y_data, test_size = 0.2)

```text
Activation function:
0- for sigmoid
1- for tanh
2- for relu

Now the constants are:
1- batch size : 16
2- epoch: 100
3- gradient descent
4- softmax with cross entropy
```

So you can change anything you want

In [4]:
def neural_network(num_hidden, size_layer, learning_rate, dropout_rate, beta, activation, batch_size = 16):
    
    def activate(activation, first_layer, second_layer, bias):
        if activation == 0:
            activation = tf.nn.sigmoid
        elif activation == 1:
            activation = tf.nn.tanh
        else:
            activation = tf.nn.relu
        layer = activation(tf.matmul(first_layer, second_layer) + bias)
        return tf.nn.dropout(layer, dropout_rate)

    tf.reset_default_graph()
    X = tf.placeholder(tf.float32, (None, x_data.shape[1]))
    Y = tf.placeholder(tf.float32, (None, onehot.shape[1]))
    input_layer = tf.Variable(tf.random_normal([x_data.shape[1], size_layer]))
    biased_layer = tf.Variable(tf.random_normal([size_layer], stddev = 0.1))
    output_layer = tf.Variable(tf.random_normal([size_layer, onehot.shape[1]]))
    biased_output = tf.Variable(tf.random_normal([onehot.shape[1]], stddev = 0.1))
    layers, biased = [], []
    for i in range(num_hidden - 1):
        layers.append(tf.Variable(tf.random_normal([size_layer, size_layer])))
        biased.append(tf.Variable(tf.random_normal([size_layer])))
    first_l = activate(activation, X, input_layer, biased_layer)
    next_l = activate(activation, first_l, layers[0], biased[0])
    for i in range(1, num_hidden - 1):
        next_l = activate(activation, next_l, layers[i], biased[i])
    last_l = tf.matmul(next_l, output_layer) + biased_output
    cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits = last_l, labels = Y))
    regularizers = tf.nn.l2_loss(input_layer) + sum(map(lambda x: tf.nn.l2_loss(x), layers)) + tf.nn.l2_loss(output_layer)
    cost = cost + beta * regularizers
    optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)
    correct_prediction = tf.equal(tf.argmax(last_l, 1), tf.argmax(Y, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
    
    sess = tf.InteractiveSession()
    sess.run(tf.global_variables_initializer())
    COST, TEST_COST, ACC, TEST_ACC = [], [], [], []
    for i in range(100):
        train_acc, train_loss = 0, 0
        for n in range(0, (x_train.shape[0] // batch_size) * batch_size, batch_size):
            _, loss = sess.run([optimizer, cost], feed_dict = {X: x_train[n: n + batch_size, :], Y: y_train[n: n + batch_size, :]})
            train_acc += sess.run(accuracy, feed_dict = {X: x_train[n: n + batch_size, :], Y: y_train[n: n + batch_size, :]})
            train_loss += loss
        TEST_COST.append(sess.run(cost, feed_dict = {X: x_test, Y: y_test}))
        TEST_ACC.append(sess.run(accuracy, feed_dict = {X: x_test, Y: y_test}))
        train_loss /= (x_train.shape[0] // batch_size)
        train_acc /= (x_train.shape[0] // batch_size)
        ACC.append(train_acc)
        COST.append(train_loss)
    COST = np.array(COST).mean()
    TEST_COST = np.array(TEST_COST).mean()
    ACC = np.array(ACC).mean()
    TEST_ACC = np.array(TEST_ACC).mean()
    return COST, TEST_COST, ACC, TEST_ACC

In [5]:
def generate_nn(num_hidden, size_layer, learning_rate, dropout_rate, beta, activation):
    global accbest
    param = {
        'num_hidden' : int(np.around(num_hidden)),
        'size_layer' : int(np.around(size_layer)),
        'learning_rate' : max(min(learning_rate, 1), 0.0001),
        'dropout_rate' : max(min(dropout_rate, 0.99), 0),
        'beta' : max(min(beta, 0.5), 0.000001),
        'activation': int(np.around(activation))
    }
    print("\nSearch parameters %s" % (param), file = log_file)
    log_file.flush()
    learning_cost, valid_cost, learning_acc, valid_acc = neural_network(**param)
    print("stop after 200 iteration with train cost %f, valid cost %f, train acc %f, valid acc %f" % (learning_cost, valid_cost, learning_acc, valid_acc))
    if (valid_acc > accbest):
        costbest = valid_acc
    return valid_acc

```text
hidden layers (2, 20)
layer size (32, 1024)
learning rate (0.0001, 1)
dropout rate (0.1, 0.99)
beta (0.000001, 0.49)
activation (0, 2)
```

You can set your own minimum and maximum boundaries, just change the value

In [6]:
log_file = open('nn-bayesian.log', 'a')
accbest = 0.0
NN_BAYESIAN = BayesianOptimization(generate_nn, 
                              {'num_hidden': (2, 20),
                               'size_layer': (32, 1024),
                               'learning_rate': (0.0001, 1),
                               'dropout_rate': (0.1, 0.99),
                               'beta': (0.000001, 0.49),
                               'activation': (0, 2)
                              })
NN_BAYESIAN.maximize(init_points = 30, n_iter = 50, acq = 'ei', xi = 0.0)

[31mInitialization[0m
[94m-------------------------------------------------------------------------------------------------------------------------[0m
 Step |   Time |      Value |   activation |      beta |   dropout_rate |   learning_rate |   num_hidden |   size_layer | 
stop after 200 iteration with train cost 9.328038, valid cost 6.830622, train acc 0.477589, valid acc 0.344333
    1 | 00m00s | [35m   0.34433[0m | [32m      0.1557[0m | [32m   0.0741[0m | [32m        0.5189[0m | [32m         0.6100[0m | [32m      2.2780[0m | [32m    109.6324[0m | 
stop after 200 iteration with train cost nan, valid cost nan, train acc 0.339286, valid acc 0.333333
    2 | 00m06s |    0.33333 |       1.5493 |    0.2110 |         0.9648 |          0.6746 |       8.4695 |     406.6496 | 
stop after 200 iteration with train cost 353764719888.678528, valid cost 323302490112.000000, train acc 0.407589, valid acc 0.338667
    3 | 00m19s |    0.33867 |       1.2247 |    0.0866 |         0.4

  " state: %s" % convergence_dict)


   32 | 00m15s |    0.33033 |       0.0000 |    0.4900 |         0.1000 |          0.0001 |       2.0000 |     251.8187 | 
stop after 200 iteration with train cost 295507838599718848.000000, valid cost 473933199441920.000000, train acc 0.409196, valid acc 0.334667
   33 | 01m13s |    0.33467 |       0.0000 |    0.4900 |         0.1000 |          1.0000 |      20.0000 |     964.5148 | 
stop after 200 iteration with train cost 516649316.413125, valid cost 528222688.000000, train acc 0.335268, valid acc 0.334667
   34 | 00m28s |    0.33467 |       0.0000 |    0.4900 |         0.1000 |          0.0001 |      20.0000 |     394.5861 | 
stop after 200 iteration with train cost 546875561072.333618, valid cost 550831259648.000000, train acc 0.330357, valid acc 0.318000


  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)


   35 | 01m18s |    0.31800 |       0.0000 |    0.4900 |         0.1000 |          0.0001 |      20.0000 |    1024.0000 | 
stop after 200 iteration with train cost 87570.081942, valid cost 87536.796875, train acc 0.367232, valid acc 0.313667


  " state: %s" % convergence_dict)


   36 | 00m18s |    0.31367 |       0.0000 |    0.4900 |         0.9900 |          0.0001 |      20.0000 |     139.0137 | 
stop after 200 iteration with train cost 144748.068772, valid cost 144693.312500, train acc 0.320268, valid acc 0.328000


  " state: %s" % convergence_dict)


   37 | 00m15s |    0.32800 |       0.0000 |    0.4900 |         0.1000 |          0.0001 |       2.0000 |     778.1937 | 
stop after 200 iteration with train cost 117422.896362, valid cost 117377.101562, train acc 0.332321, valid acc 0.340333
   38 | 00m22s |    0.34033 |       0.0000 |    0.4900 |         0.1000 |          0.0001 |       2.0000 |     698.8917 | 
stop after 200 iteration with train cost nan, valid cost nan, train acc 0.339286, valid acc 0.333333


  " state: %s" % convergence_dict)


   39 | 00m45s |    0.33333 |       2.0000 |    0.0000 |         0.9900 |          1.0000 |      20.0000 |     655.3416 | 
stop after 200 iteration with train cost 129.469567, valid cost 134.233414, train acc 0.387143, valid acc 0.331667
   40 | 00m22s |    0.33167 |       0.0000 |    0.0000 |         0.9900 |          1.0000 |       2.0000 |     924.9424 | 
stop after 200 iteration with train cost 2.360393, valid cost 2.157934, train acc 0.701964, valid acc 0.699667
   41 | 00m06s | [35m   0.69967[0m | [32m      0.0000[0m | [32m   0.0000[0m | [32m        0.9900[0m | [32m         0.0001[0m | [32m      2.0000[0m | [32m    985.9104[0m | 
stop after 200 iteration with train cost 4227.413826, valid cost 5810.387695, train acc 0.793482, valid acc 0.731667


  " state: %s" % convergence_dict)


   42 | 00m05s | [35m   0.73167[0m | [32m      2.0000[0m | [32m   0.0000[0m | [32m        0.9900[0m | [32m         0.0001[0m | [32m      2.0000[0m | [32m    979.2264[0m | 
stop after 200 iteration with train cost 114.748760, valid cost 109.977303, train acc 0.385714, valid acc 0.331667
   43 | 00m14s |    0.33167 |       0.0000 |    0.0000 |         0.9900 |          1.0000 |       2.0000 |     974.0893 | 
stop after 200 iteration with train cost nan, valid cost nan, train acc 0.339286, valid acc 0.333333
   44 | 00m38s |    0.33333 |       2.0000 |    0.0411 |         0.5573 |          0.0001 |       8.9461 |     989.1162 | 
stop after 200 iteration with train cost nan, valid cost nan, train acc 0.339286, valid acc 0.333333
   45 | 01m00s |    0.33333 |       2.0000 |    0.4900 |         0.1000 |          0.0001 |      20.0000 |     898.4754 | 
stop after 200 iteration with train cost 89389.414922, valid cost 91162.671875, train acc 0.782143, valid acc 0.726333
   46 | 0

  " state: %s" % convergence_dict)


   50 | 01m04s |    0.33333 |       2.0000 |    0.0000 |         0.1000 |          0.0001 |      20.0000 |     932.1613 | 
stop after 200 iteration with train cost 1790989561259.659424, valid cost 2872419072.000000, train acc 0.417321, valid acc 0.340000
   51 | 00m30s |    0.34000 |       0.0000 |    0.4900 |         0.1000 |          1.0000 |      20.0000 |     466.5532 | 
stop after 200 iteration with train cost nan, valid cost nan, train acc 0.339286, valid acc 0.333333
   52 | 00m16s |    0.33333 |       2.0000 |    0.4900 |         0.1000 |          1.0000 |      20.0000 |      90.3217 | 
stop after 200 iteration with train cost 23599.251629, valid cost 23225.775391, train acc 0.339196, valid acc 0.327667
   53 | 00m12s |    0.32767 |       2.0000 |    0.4900 |         0.1000 |          0.0001 |       2.0000 |     291.1782 | 
stop after 200 iteration with train cost 10849.599879, valid cost 10846.084961, train acc 0.330982, valid acc 0.333000
   54 | 00m12s |    0.33300 |       0

In [7]:
print('Maximum NN accuracy value: %f' % NN_BAYESIAN.res['max']['max_val'])
print('Best NN parameters: ', NN_BAYESIAN.res['max']['max_params'])

Maximum NN accuracy value: 0.731667
Best NN parameters:  {'dropout_rate': 0.98999999999999999, 'beta': 9.9999999999999995e-07, 'learning_rate': 0.0001, 'size_layer': 979.22638102861038, 'activation': 2.0, 'num_hidden': 2.0}


So that means, best optimized parameters are:
```text
dropout rate: 0.98999999999999999
beta: 9.9999999999999995e-07
learning rate: 0.0001
size layer: 979 wide
activation function: relu
hidden layers: 2
```