### Goal : Predict the kernel size with neural network

--

Objective: Reach the minimum train/test MAPE

### Results => 4% train - 6% test

### TODO:
 - Add a dropout to avoid overfitting on the training test
 - Try deeper nets, with more layers?

In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split

See preprocessing_neural_network.ipynb for the construction of the dataset

In [2]:
features = pd.read_csv(r'C:\Users\llesoil\Documents\tuxml\burt_yes_important.csv', delimiter = ',', index_col = 0)

Training-test split

In [33]:
n = 65500
sizes = np.array(features[0:n]['vmlinux'])
x_train, x_test, y_train, y_test = train_test_split(features.drop('vmlinux', axis=1)[0:n], sizes, test_size = 0.1)

nbCol = len(features.columns)

x_train = np.array(x_train, dtype = np.float32)
x_test =  np.array(x_test, dtype = np.float32)

y_train = np.array(y_train, dtype = np.float32)
y_test = np.array(y_test, dtype = np.float32)

Neuronal network with 3 layers

Best results with 90% training - 10% test

In [34]:
nb_features = x_train.shape[1]
batch_size = 100
nb_epochs = 40
nb_batch_train = int(len(x_train)/batch_size)-1
nb_batch_test = int(len(x_test)/batch_size)
nb_node_layer1 = 200
nb_node_layer2 = 100

# slice the datasets => feed_dict was very slow, so I choose an iterator solution
dataset_train = tf.data.Dataset.from_tensor_slices((x_train, y_train)).batch(batch_size)
iterator_train = tf.compat.v1.data.make_initializable_iterator(dataset_train)
xtr, ytr = iterator_train.get_next()

dataset_test = tf.data.Dataset.from_tensor_slices((x_test, y_test)).batch(batch_size)
iterator_test = tf.compat.v1.data.make_initializable_iterator(dataset_test)
xte, yte = iterator_test.get_next()

with tf.device("/gpu:0"):
    # Layers training
    w_h1_tr = tf.Variable(tf.glorot_uniform_initializer()((nb_features, nb_node_layer1)), name = "w_h1_tr")
    mat_h1_tr = tf.matmul(xtr, w_h1_tr)
    b_h1_tr = tf.Variable(tf.zeros(nb_node_layer1), name="b_h1_tr")
    out_h1_tr = tf.nn.relu(tf.add(mat_h1_tr, b_h1_tr))

    w_h2_tr = tf.Variable(tf.glorot_uniform_initializer()((nb_node_layer1, nb_node_layer2)), name = "w_h2_tr")
    mat_h2_tr = tf.matmul(out_h1_tr, w_h2_tr)
    b_h2_tr = tf.Variable(tf.zeros(nb_node_layer2), name="b_h2_tr")
    out_h2_tr = tf.nn.relu(tf.add(mat_h2_tr, b_h2_tr))

    w_final_tr = tf.Variable(tf.glorot_uniform_initializer()((nb_node_layer2, 1)), name = "w_final_tr")
    outputs_tr =  tf.reshape(tf.matmul(out_h2_tr, w_final_tr), shape=[batch_size])
    ytr = tf.reshape(ytr, [batch_size])

    # Layers test
    mat_h1_te = tf.matmul(xte, w_h1_tr)
    out_h1_te = tf.nn.relu(tf.add(mat_h1_te, b_h1_tr))

    mat_h2_te = tf.matmul(out_h1_te, w_h2_tr)
    out_h2_te = tf.nn.relu(tf.add(mat_h2_te, b_h2_tr))

    outputs_te =  tf.reshape(tf.matmul(out_h2_te, w_final_tr), shape=[batch_size])
    yte = tf.reshape(yte, [batch_size])

    # Cost => MAPE
    train_cost = tf.keras.losses.MAPE(ytr, outputs_tr)
    test_cost = tf.keras.losses.MAPE(yte, outputs_te)

    # Convergence function => AdamOptimizer
    train_step = tf.train.AdamOptimizer(learning_rate=1).minimize(train_cost)

    # train step with a lower learning rate => gain few % at the end
    tiny_train_step = tf.train.AdamOptimizer(learning_rate=0.01).minimize(train_cost)
    
    # allocate memory for tensors
    init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)
    for j in range(nb_epochs):
        if j < 20:
            sess.run(iterator_train.initializer)
            for i in range(nb_batch_train):
                val = sess.run(train_step)
            print("Cout entrainement epoch n°", j+1, ":",  sess.run(train_cost))
        else:
            sess.run(iterator_train.initializer)
            for i in range(nb_batch_train):
                val = sess.run(tiny_train_step)
            print("Cout entrainement epoch n°", j+1, ":",  sess.run(train_cost))
    sess.run(iterator_train.initializer)
    mape = 0
    for i in range(nb_batch_train):
        mape+=sess.run(train_cost)
    print("Cout entrainement final =", mape/nb_batch_train)
    mape = 0
    sess.run(iterator_test.initializer)
    for i in range(nb_batch_test):
        mape+=sess.run(test_cost)
    print("Cout test final =", mape/nb_batch_test)

Cout entrainement epoch n° 1 : 15.791492
Cout entrainement epoch n° 2 : 12.765275
Cout entrainement epoch n° 3 : 11.36124
Cout entrainement epoch n° 4 : 10.141923
Cout entrainement epoch n° 5 : 10.176031
Cout entrainement epoch n° 6 : 10.488874
Cout entrainement epoch n° 7 : 9.427525
Cout entrainement epoch n° 8 : 10.93229
Cout entrainement epoch n° 9 : 7.9154744
Cout entrainement epoch n° 10 : 7.85541
Cout entrainement epoch n° 11 : 7.434781
Cout entrainement epoch n° 12 : 6.702325
Cout entrainement epoch n° 13 : 8.32142
Cout entrainement epoch n° 14 : 8.600398
Cout entrainement epoch n° 15 : 6.6145124
Cout entrainement epoch n° 16 : 7.7016144
Cout entrainement epoch n° 17 : 7.352169
Cout entrainement epoch n° 18 : 8.296477
Cout entrainement epoch n° 19 : 7.7162833
Cout entrainement epoch n° 20 : 6.792471
Cout entrainement epoch n° 21 : 6.582334
Cout entrainement epoch n° 22 : 6.5134516
Cout entrainement epoch n° 23 : 6.517364
Cout entrainement epoch n° 24 : 6.5189657
Cout entrainemen

Best results (with 80% training - 20% test)

In [31]:
nb_features = x_train.shape[1]
batch_size = 100
nb_epochs = 40
nb_batch_train = int(len(x_train)/batch_size)-1
nb_batch_test = int(len(x_test)/batch_size)
nb_node_layer1 = 200
nb_node_layer2 = 100

# slice the datasets => feed_dict was very slow, so I choose an iterator solution
dataset_train = tf.data.Dataset.from_tensor_slices((x_train, y_train)).batch(batch_size)
iterator_train = tf.compat.v1.data.make_initializable_iterator(dataset_train)
xtr, ytr = iterator_train.get_next()

dataset_test = tf.data.Dataset.from_tensor_slices((x_test, y_test)).batch(batch_size)
iterator_test = tf.compat.v1.data.make_initializable_iterator(dataset_test)
xte, yte = iterator_test.get_next()

with tf.device("/gpu:0"):
    # Layers training
    w_h1_tr = tf.Variable(tf.glorot_uniform_initializer()((nb_features, nb_node_layer1)), name = "w_h1_tr")
    mat_h1_tr = tf.matmul(xtr, w_h1_tr)
    b_h1_tr = tf.Variable(tf.zeros(nb_node_layer1), name="b_h1_tr")
    out_h1_tr = tf.nn.relu(tf.add(mat_h1_tr, b_h1_tr))

    w_h2_tr = tf.Variable(tf.glorot_uniform_initializer()((nb_node_layer1, nb_node_layer2)), name = "w_h2_tr")
    mat_h2_tr = tf.matmul(out_h1_tr, w_h2_tr)
    b_h2_tr = tf.Variable(tf.zeros(nb_node_layer2), name="b_h2_tr")
    out_h2_tr = tf.nn.relu(tf.add(mat_h2_tr, b_h2_tr))

    w_final_tr = tf.Variable(tf.glorot_uniform_initializer()((nb_node_layer2, 1)), name = "w_final_tr")
    outputs_tr =  tf.reshape(tf.matmul(out_h2_tr, w_final_tr), shape=[batch_size])
    ytr = tf.reshape(ytr, [batch_size])

    # Layers test
    mat_h1_te = tf.matmul(xte, w_h1_tr)
    out_h1_te = tf.nn.relu(tf.add(mat_h1_te, b_h1_tr))

    mat_h2_te = tf.matmul(out_h1_te, w_h2_tr)
    out_h2_te = tf.nn.relu(tf.add(mat_h2_te, b_h2_tr))

    outputs_te =  tf.reshape(tf.matmul(out_h2_te, w_final_tr), shape=[batch_size])
    yte = tf.reshape(yte, [batch_size])

    # Cost => MAPE
    train_cost = tf.keras.losses.MAPE(ytr, outputs_tr)
    test_cost = tf.keras.losses.MAPE(yte, outputs_te)

    # Convergence function => AdamOptimizer
    train_step = tf.train.AdamOptimizer(learning_rate=1).minimize(train_cost)

    # train step with a lower learning rate => gain few % at the end
    tiny_train_step = tf.train.AdamOptimizer(learning_rate=0.01).minimize(train_cost)
    
    # allocate memory for tensors
    init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)
    for j in range(nb_epochs):
        if j < 20:
            sess.run(iterator_train.initializer)
            for i in range(nb_batch_train):
                val = sess.run(train_step)
            print("Cout entrainement epoch n°", j+1, ":",  sess.run(train_cost))
        else:
            sess.run(iterator_train.initializer)
            for i in range(nb_batch_train):
                val = sess.run(tiny_train_step)
            print("Cout entrainement epoch n°", j+1, ":",  sess.run(train_cost))
    sess.run(iterator_train.initializer)
    mape = 0
    for i in range(nb_batch_train):
        mape+=sess.run(train_cost)
    print("Cout entrainement final =", mape/nb_batch_train)
    mape = 0
    sess.run(iterator_test.initializer)
    for i in range(nb_batch_test):
        mape+=sess.run(test_cost)
    print("Cout test final =", mape/nb_batch_test)

Cout entrainement epoch n° 1 : 13.483752
Cout entrainement epoch n° 2 : 12.989761
Cout entrainement epoch n° 3 : 10.49069
Cout entrainement epoch n° 4 : 10.093693
Cout entrainement epoch n° 5 : 11.985236
Cout entrainement epoch n° 6 : 10.376923
Cout entrainement epoch n° 7 : 12.580645
Cout entrainement epoch n° 8 : 11.428123
Cout entrainement epoch n° 9 : 10.47652
Cout entrainement epoch n° 10 : 8.868928
Cout entrainement epoch n° 11 : 9.333644
Cout entrainement epoch n° 12 : 8.709122
Cout entrainement epoch n° 13 : 8.072023
Cout entrainement epoch n° 14 : 7.910425
Cout entrainement epoch n° 15 : 7.2187185
Cout entrainement epoch n° 16 : 7.421232
Cout entrainement epoch n° 17 : 7.520315
Cout entrainement epoch n° 18 : 8.34701
Cout entrainement epoch n° 19 : 7.2569666
Cout entrainement epoch n° 20 : 7.509277
Cout entrainement epoch n° 21 : 6.5936685
Cout entrainement epoch n° 22 : 6.554046
Cout entrainement epoch n° 23 : 6.55459
Cout entrainement epoch n° 24 : 6.545317
Cout entrainement