### Goal : find hyperparameters
--

In [2]:
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
from scipy.optimize import minimize
import tuxml

In [3]:
features = tuxml.load_dataset()
selection = pd.read_csv("feature_net.csv", index_col = 0, skiprows = 1, names = ['feat'])['feat']
selection[len(selection)] = 'vmlinux'
features = features[selection].replace([2,-1],0)

Training-test split

In [4]:
n = 65000
sizes = np.array(features[0:n]['vmlinux'])
x_train, x_test, y_train, y_test = train_test_split(features.drop('vmlinux', axis=1)[0:n], sizes, test_size = 0.1)

nbCol = len(features.columns)

x_train = np.array(x_train, dtype = np.float32)
x_test =  np.array(x_test, dtype = np.float32)

y_train = np.array(y_train, dtype = np.float32)
y_test = np.array(y_test, dtype = np.float32)

nb_features = x_train.shape[1]

Function to minimize:
- take the data in input
- compute the neural network
- return the test MAPE in output

In [5]:
def compute_net(batch_size = 100, nb_epochs = 30, nb_node_layer1 = 200, nb_node_layer2 = 100, lr1 = 1, lr2 = 0.01):
    
    #     batch_size = int(x[0])
    #     nb_epochs = int(x[1])
    #     nb_node_layer1 = int(x[2])
    #     nb_node_layer2 = int(x[3])
    #     lr1 = x[4]
    #     lr2 = x[5]
    
    nb_batch_train = int(len(x_train)/batch_size)-1
    nb_batch_test = int(len(x_test)/batch_size)
    
    # slice the datasets => feed_dict was very slow, so I choose an iterator solution
    dataset_train = tf.data.Dataset.from_tensor_slices((x_train, y_train)).batch(batch_size)
    iterator_train = tf.compat.v1.data.make_initializable_iterator(dataset_train)
    xtr, ytr = iterator_train.get_next()

    dataset_test = tf.data.Dataset.from_tensor_slices((x_test, y_test)).batch(batch_size)
    iterator_test = tf.compat.v1.data.make_initializable_iterator(dataset_test)
    xte, yte = iterator_test.get_next()

    with tf.device("/gpu:0"):
        # Layers training
        w_h1_tr = tf.Variable(tf.glorot_uniform_initializer()((nb_features, nb_node_layer1)), name = "w_h1_tr")
        mat_h1_tr = tf.matmul(xtr, w_h1_tr)
        b_h1_tr = tf.Variable(tf.zeros(nb_node_layer1), name="b_h1_tr")
        out_h1_tr = tf.nn.relu(tf.add(mat_h1_tr, b_h1_tr))

        w_h2_tr = tf.Variable(tf.glorot_uniform_initializer()((nb_node_layer1, nb_node_layer2)), name = "w_h2_tr")
        mat_h2_tr = tf.matmul(out_h1_tr, w_h2_tr)
        b_h2_tr = tf.Variable(tf.zeros(nb_node_layer2), name="b_h2_tr")
        out_h2_tr = tf.nn.relu(tf.add(mat_h2_tr, b_h2_tr))

        w_final_tr = tf.Variable(tf.glorot_uniform_initializer()((nb_node_layer2, 1)), name = "w_final_tr")
        outputs_tr = tf.reshape(tf.matmul(out_h2_tr, w_final_tr), shape=[batch_size])
        ytr = tf.reshape(ytr, [batch_size])

        # Layers test
        mat_h1_te = tf.matmul(xte, w_h1_tr)
        out_h1_te = tf.nn.relu(tf.add(mat_h1_te, b_h1_tr))

        mat_h2_te = tf.matmul(out_h1_te, w_h2_tr)
        out_h2_te = tf.nn.relu(tf.add(mat_h2_te, b_h2_tr))

        outputs_te =  tf.reshape(tf.matmul(out_h2_te, w_final_tr), shape=[batch_size])
        yte = tf.reshape(yte, [batch_size])

        # Cost => MAPE
        train_cost = tf.keras.losses.MAPE(ytr, outputs_tr)
        test_cost = tf.keras.losses.MAPE(yte, outputs_te)

        # Convergence function => AdamOptimizer
        train_step = tf.train.AdamOptimizer(learning_rate=lr1).minimize(train_cost)

        # train step with a lower learning rate => gain few % at the end
        tiny_train_step = tf.train.AdamOptimizer(learning_rate=lr2).minimize(train_cost)

        # allocate memory for tensors
        init = tf.global_variables_initializer()

    with tf.Session() as sess:
        sess.run(init)
        for j in range(nb_epochs):
            if j < 20:
                sess.run(iterator_train.initializer)
                for i in range(nb_batch_train):
                    val = sess.run(train_step)
                #print("Cout entrainement epoch n°", j+1, ":",  sess.run(train_cost))
            else:
                sess.run(iterator_train.initializer)
                for i in range(nb_batch_train):
                    val = sess.run(tiny_train_step)
                #print("Cout entrainement epoch n°", j+1, ":",  sess.run(train_cost))
        mape = 0
        sess.run(iterator_train.initializer)
        for i in range(nb_batch_train):
            mape+=sess.run(train_cost)
        mape = 0
        sess.run(iterator_test.initializer)
        for i in range(nb_batch_test):
            mape+=sess.run(test_cost)
    return mape/nb_batch_test

Search for hyperparameters

In [5]:
lr1_values = [0.5, 1, 2, 5, 10]
lr2_values = [0.01, 0.05]
batch_size = [50, 100, 200]
nb_node_layer1 = [200, 300, 400]
nb_node_layer2 = [100, 200, 300]

res = dict()

for lr1 in lr1_values:
    for lr2 in lr2_values:
        for bs in batch_size:
            #for nbl1 in nb_node_layer1:
            #for nbl2 in nb_node_layer2:
            res[lr1, lr2, bs] = compute_net(lr1 = lr1, lr2 = lr2, batch_size = bs)#,nb_node_layer1 = nbl1, nb_node_layer2 = nbl2)
res

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Use tf.cast instead.


{(0.5, 0.01, 50): 6.1139626759749195,
 (0.5, 0.01, 100): 6.288037366133469,
 (0.5, 0.01, 200): 6.3636893182992935,
 (0.5, 0.05, 50): 5.927036344088041,
 (0.5, 0.05, 100): 6.302827460949238,
 (0.5, 0.05, 200): 6.8108696937561035,
 (1, 0.01, 50): 6.401567217019888,
 (1, 0.01, 100): 6.15475372167734,
 (1, 0.01, 200): 6.420121535658836,
 (1, 0.05, 50): 6.04258550130404,
 (1, 0.05, 100): 6.142240516956036,
 (1, 0.05, 200): 6.298232853412628,
 (2, 0.01, 50): 6.474702497629019,
 (2, 0.01, 100): 6.676808716700627,
 (2, 0.01, 200): 6.442576393485069,
 (2, 0.05, 50): 6.371084561714759,
 (2, 0.05, 100): 6.497246683560885,
 (2, 0.05, 200): 6.085170641541481,
 (5, 0.01, 50): 40.140974279550406,
 (5, 0.01, 100): 40.13643575815054,
 (5, 0.01, 200): 40.16221237182617,
 (5, 0.05, 50): 40.138973910991965,
 (5, 0.05, 100): 40.13834428053636,
 (5, 0.05, 200): 40.16430842876434,
 (10, 0.01, 50): 40.14627510951116,
 (10, 0.01, 100): 40.136890646127554,
 (10, 0.01, 200): 100.0,
 (10, 0.05, 50): 40.1396022796

In [7]:
lr1_values = [0.25, 0.5, 0.75]
lr2_values = [0.025, 0.05, 0.0075]
batch_size = 50

res = dict()

for lr1 in lr1_values:
    for lr2 in lr2_values:
        res[lr1, lr2, batch_size] = compute_net(lr1 = lr1, lr2 = lr2, batch_size = batch_size)
res

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Use tf.cast instead.


{(0.25, 0.025, 50): 6.469745023433979,
 (0.25, 0.05, 50): 6.543176416250375,
 (0.25, 0.0075, 50): 6.792832026114831,
 (0.5, 0.025, 50): 5.791097765702467,
 (0.5, 0.05, 50): 5.865269492222713,
 (0.5, 0.0075, 50): 6.051237762891329,
 (0.75, 0.025, 50): 5.994227405694815,
 (0.75, 0.05, 50): 6.2046061038970945,
 (0.75, 0.0075, 50): 6.284441397740291}

In [10]:
for i in range(5):
    print(compute_net(lr1 = 0.5, lr2 = 0.025, batch_size = 50))

5.944322465016292
6.056905442017776
6.001912755232591
6.062779107460609
5.9570104598999025


I guess there is more optimization to do in the initialization weights than in our hyperparameters.

In [11]:
lr1_values = 0.5
lr2_values = 0.025
batch_size = 50
nb_node_layer1 = [200, 300, 400]
nb_node_layer2 = [100, 200, 300]

res = dict()

for node1 in nb_node_layer1:
    for node2 in nb_node_layer2:
        res[lr1, lr2, batch_size, node1, node2] = compute_net(lr1 = lr1, lr2 = lr2, batch_size = batch_size,
                                                             nb_node_layer1 = node1, nb_node_layer2 = node2)
res

{(0.75, 0.0075, 50, 200, 100): 6.045812485768245,
 (0.75, 0.0075, 50, 200, 200): 6.0127736421731806,
 (0.75, 0.0075, 50, 200, 300): 6.077524845416729,
 (0.75, 0.0075, 50, 300, 100): 6.190631213554969,
 (0.75, 0.0075, 50, 300, 200): 6.088517420108502,
 (0.75, 0.0075, 50, 300, 300): 6.019588349415706,
 (0.75, 0.0075, 50, 400, 100): 6.295459662950956,
 (0.75, 0.0075, 50, 400, 200): 6.401230922112098,
 (0.75, 0.0075, 50, 400, 300): 6.162542775961069}

In [None]:
#to run this night

lr1_values = 0.5
lr2_values = 0.025
batch_size = 50
nb_node_layer1 = [200, 300, 400]
nb_node_layer2 = [100, 200, 300]

res = dict()

for node1 in nb_node_layer1:
    for node2 in nb_node_layer2:
        res[lr1_values, lr2_values, batch_size, node1, node2] = compute_net(lr1 = lr1, lr2 = lr2, batch_size = batch_size,
                                                             nb_node_layer1 = node1, nb_node_layer2 = node2)
res