In [None]:
%pip install keras-tuner --upgrade

In [None]:
import tensorflow as tf
from tensorflow import keras
from keras import layers
import keras_tuner as kt
from sklearn.model_selection import train_test_split
import numpy as np
import pandas as pd
import subprocess

In [None]:
subprocess.run(['java', '-jar', 'createDataset.jar', '-ss', 'RandomConfiguration', '-m', '100', '-fm', './bcs_v1.xml', '-c', './products.csv'], capture_output=True)

In [None]:
# get dataset
dataset = pd.read_csv("Data/Apache_AllNumeric.csv")
dataset = dataset.sample(frac=1)
dataset_features = dataset.copy()
dataset_labels = dataset_features.pop('PERF')

In [None]:
dataset

In [None]:
# normalize dataset (MinMaxScale)
features_max = dataset_features.max()
labels_max = dataset_labels.max()
dataset_features /= features_max
dataset_labels /= dataset_labels.max()

In [None]:
dataset_labels

In [None]:
# split dataset train (2/3) test (1/3)
x, x_test, y, y_test = train_test_split(dataset_features, dataset_labels, test_size=0.33)

In [None]:
x

In [None]:
def build_model(hp):
    model = keras.Sequential()
    model.add(keras.layers.Flatten())
    for i in range(hp.Int("num_layers", 1, 11)):
        model.add(
            keras.layers.Dense(
                units=hp.Int(f"units_{i}", min_value=8, max_value=256, step=8),
                activation=hp.Choice("activation", ["relu", "tanh", "sigmoid"])
            )
        )
    if hp.Boolean("dropout"):
        model.add(keras.layers.Dropout(rate=hp.Choice("dr", [0.25, 0.5])))
    model.add(keras.layers.Dense(1))
    learning_rate = hp.Float("lr", min_value=1e-4, max_value=1e-1, sampling="log")
    model.compile(
        optimizer=keras.optimizers.Adam(learning_rate=learning_rate),
        loss="mean_squared_error",
        metrics=[keras.metrics.MeanSquaredError()]
    )
    return model

In [None]:
tuner = kt.BayesianOptimization(
    hypermodel = build_model,
    objective="mean_squared_error",
    max_trials=10,
    overwrite=True,
    directory="my_tuner",
    project_name="feature_degradation",
)

In [None]:
es = keras.callbacks.EarlyStopping(
    monitor="mean_squared_error",
    patience=5,
    restore_best_weights=True
)

In [None]:
tuner.search_space_summary()

In [None]:
x_train, x_val, y_train, y_val = train_test_split(x, y, test_size=0.2)

In [None]:
x_train

In [None]:
tuner.search(x_train, y_train, epochs=10, validation_data=(x_val, y_val), callbacks=[es])

In [None]:
tuner.results_summary()

In [27]:
best_hps = tuner.get_best_hyperparameters(5)
model = build_model(best_hps[0])
history = model.fit(x_train, y_train, batch_size=20, epochs=20, validation_data=(x_val, y_val),)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [None]:
keras.utils.plot_model(model, "model.png", show_shapes=True)

In [28]:
print("Evaluate on test data")
results = model.evaluate(x_test, y_test, batch_size=5)

Evaluate on test data


In [29]:
print("Generate predictions for 3 samples\n")
test = x_test[:3]
print(test)
predictions = model.predict(test)
print("predictions:\n", predictions*labels_max)

Generate predictions for 3 samples
     HostnameLookups  KeepAlive  EnableSendfile  FollowSymLinks  AccessLog  \
99               1.0        0.0             1.0             1.0        1.0   
86               1.0        0.0             1.0             0.0        0.0   
160              0.0        0.0             0.0             0.0        1.0   

     ExtendedStatus  InMemory  Handle  
99              0.0       0.0     1.0  
86              0.0       0.0     1.0  
160             1.0       1.0     0.0  
predictions: [[ 973.21765]
 [1062.461  ]
 [1135.5249 ]]


In [25]:
np.concatenate((test, predictions*labels_max), axis=1)


array([[1.00000000e+00, 0.00000000e+00, 1.00000000e+00, 1.00000000e+00,
        1.00000000e+00, 0.00000000e+00, 0.00000000e+00, 1.00000000e+00,
        1.24021741e+03],
       [1.00000000e+00, 0.00000000e+00, 1.00000000e+00, 0.00000000e+00,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 1.00000000e+00,
        1.10239429e+03],
       [0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
        1.00000000e+00, 1.00000000e+00, 1.00000000e+00, 0.00000000e+00,
        1.31707520e+03]])

In [26]:
print(predictions*labels_max)

[[1240.2174]
 [1102.3943]
 [1317.0752]]
