In [14]:
import pandas
data = pandas.read_csv("train.csv")

In [15]:
import optuna

In [16]:
import pickle
import numpy as np

# for visualization
import matplotlib.pyplot as plt
import seaborn as sns

# for neural networks
import tensorflow as tf
from tensorflow import keras
from keras.backend import clear_session
from keras.layers import Dense, Input
from keras.layers import Dropout
from keras.models import Sequential
from keras.optimizers import Adam

# machine learning basics
from sklearn.datasets import load_breast_cancer
# train_test_split was moved from cross_validation to model_selection in 0.18
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix, classification_report

In [17]:
BATCHSIZE = 100
# number of epochs
EPOCHS = 10

# needed to save best model so far
global best_accuracy_so_far

## The objective function for optuna to optimize the hyperparameters

In [18]:
def objective(trial):
    global best_accuracy_so_far

    # Clear clutter from previous Keras session graphs.
    clear_session()

    covertype = data

    # get features X and labels y
    X = data.values[:,1:-1]
    y = data['Cover_Type'].values

    # split dataset into training and validation datasets
    X_train, X_val, y_train, y_val = train_test_split(X, y)

    scaler = StandardScaler()
    # Fit only to the training data
    scaler.fit(X_train)
    # save fitted scaler, because you need it later for the test dataset
    pickle.dump(scaler, open("scaler.p", "wb"))

    # Now apply the transformations to the data:
    X_train = scaler.transform(X_train)
    X_val = scaler.transform(X_val)

    # create neural network
    model = Sequential()

    model.add(Input(shape=54))
    model.add(Dense(16, kernel_initializer='uniform', activation='relu'))
    model.add(Dropout(0.1))
    model.add(Dense(units=trial.suggest_int("units", 8, 24, step=4), kernel_initializer='uniform',
                    activation=trial.suggest_categorical("activation", ["relu", "linear"])))
    # Adding dropout to prevent overfitting
    model.add(Dropout(rate=trial.suggest_float("rate", 0.0, 0.1, step=0.1)))
    model.add(Dense(10, kernel_initializer='uniform', activation='sigmoid'))

    # We compile our model with a sampled learning rate.
    learning_rate = trial.suggest_float("learning_rate", 1e-5, 1e-1, log=True)
    model.compile(
        loss="sparse_categorical_crossentropy",
        optimizer=Adam(learning_rate=learning_rate),
        metrics=["accuracy"],
    )

    # train neural network
    model.fit(
        X_train,
        y_train,
        validation_data=(X_val, y_val),
        shuffle=True,
        batch_size=BATCHSIZE,
        epochs=EPOCHS,
        verbose=False,
    )

    # Evaluate the model accuracy on the validation set.
    score = model.evaluate(X_val, y_val, verbose=0)

    # save best model so far to be able to use the best model later to predict with test data
    if score[1] >= best_accuracy_so_far:
        tf.keras.models.save_model(model, '{0}.mdl'.format(trial.number))
        best_accuracy_so_far = score[1]

    # return accuracy
    return score[1]

# Main Function
Use optuna to do hyperparameter optimization to find optimal neural network architecture

In [19]:
best_accuracy_so_far = -100
study = optuna.create_study(direction="maximize")
# be cautious with the number of trials: Do not use a number larger than 50
# this call starts the hyperparameter optimization process: the above define function "objective" is called with
# n_trials different hyperparameter combinations
study.optimize(objective, n_trials=15, timeout=600)

[32m[I 2022-12-01 12:23:20,575][0m A new study created in memory with name: no-name-6007bb5b-add6-4658-9f16-98c92a0c71f9[0m


INFO:tensorflow:Assets written to: 0.mdl\assets


[32m[I 2022-12-01 12:23:24,392][0m Trial 0 finished with value: 0.29206350445747375 and parameters: {'units': 12, 'activation': 'linear', 'rate': 0.0, 'learning_rate': 2.3957644740303312e-05}. Best is trial 0 with value: 0.29206350445747375.[0m


INFO:tensorflow:Assets written to: 1.mdl\assets


[32m[I 2022-12-01 12:23:28,311][0m Trial 1 finished with value: 0.664814829826355 and parameters: {'units': 12, 'activation': 'relu', 'rate': 0.0, 'learning_rate': 0.0009099653037689419}. Best is trial 1 with value: 0.664814829826355.[0m
[32m[I 2022-12-01 12:23:31,186][0m Trial 2 finished with value: 0.4047619104385376 and parameters: {'units': 20, 'activation': 'linear', 'rate': 0.0, 'learning_rate': 0.00016263633864492386}. Best is trial 1 with value: 0.664814829826355.[0m


INFO:tensorflow:Assets written to: 3.mdl\assets


[32m[I 2022-12-01 12:23:35,059][0m Trial 3 finished with value: 0.6830688118934631 and parameters: {'units': 20, 'activation': 'relu', 'rate': 0.0, 'learning_rate': 0.0008402937552590816}. Best is trial 3 with value: 0.6830688118934631.[0m
[32m[I 2022-12-01 12:23:38,515][0m Trial 4 finished with value: 0.6716931462287903 and parameters: {'units': 12, 'activation': 'linear', 'rate': 0.1, 'learning_rate': 0.0016389309960579475}. Best is trial 3 with value: 0.6830688118934631.[0m


INFO:tensorflow:Assets written to: 5.mdl\assets


[32m[I 2022-12-01 12:23:42,253][0m Trial 5 finished with value: 0.6970899701118469 and parameters: {'units': 16, 'activation': 'relu', 'rate': 0.1, 'learning_rate': 0.009018541301952095}. Best is trial 5 with value: 0.6970899701118469.[0m


INFO:tensorflow:Assets written to: 6.mdl\assets


[32m[I 2022-12-01 12:23:46,112][0m Trial 6 finished with value: 0.7148148417472839 and parameters: {'units': 12, 'activation': 'relu', 'rate': 0.1, 'learning_rate': 0.010365263421036783}. Best is trial 6 with value: 0.7148148417472839.[0m
[32m[I 2022-12-01 12:23:49,272][0m Trial 7 finished with value: 0.6637566089630127 and parameters: {'units': 24, 'activation': 'relu', 'rate': 0.0, 'learning_rate': 0.0006443274441388234}. Best is trial 6 with value: 0.7148148417472839.[0m
[32m[I 2022-12-01 12:23:52,517][0m Trial 8 finished with value: 0.36666667461395264 and parameters: {'units': 24, 'activation': 'linear', 'rate': 0.1, 'learning_rate': 2.8329204188749607e-05}. Best is trial 6 with value: 0.7148148417472839.[0m
[32m[I 2022-12-01 12:23:55,404][0m Trial 9 finished with value: 0.6952381134033203 and parameters: {'units': 20, 'activation': 'relu', 'rate': 0.0, 'learning_rate': 0.001066740553895641}. Best is trial 6 with value: 0.7148148417472839.[0m
[32m[I 2022-12-01 12:23:5

INFO:tensorflow:Assets written to: 11.mdl\assets


[32m[I 2022-12-01 12:24:02,848][0m Trial 11 finished with value: 0.7320106029510498 and parameters: {'units': 16, 'activation': 'relu', 'rate': 0.1, 'learning_rate': 0.017925001870819347}. Best is trial 11 with value: 0.7320106029510498.[0m
[32m[I 2022-12-01 12:24:06,247][0m Trial 12 finished with value: 0.7044973373413086 and parameters: {'units': 16, 'activation': 'relu', 'rate': 0.1, 'learning_rate': 0.02372029775562968}. Best is trial 11 with value: 0.7320106029510498.[0m
[32m[I 2022-12-01 12:24:09,553][0m Trial 13 finished with value: 0.7164021134376526 and parameters: {'units': 8, 'activation': 'relu', 'rate': 0.1, 'learning_rate': 0.010033185436571333}. Best is trial 11 with value: 0.7320106029510498.[0m
[32m[I 2022-12-01 12:24:12,780][0m Trial 14 finished with value: 0.658994734287262 and parameters: {'units': 8, 'activation': 'relu', 'rate': 0.1, 'learning_rate': 0.08752270142893724}. Best is trial 11 with value: 0.7320106029510498.[0m


In [20]:
print("Number of finished trials: {}".format(len(study.trials)))

Number of finished trials: 15


Analyse the best model and use it to predict accuracy on test data

In [21]:
print("Best trial:")
trial = study.best_trial
print(trial)

print("  Value: {}".format(trial.value))

print("  Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))

Best trial:
FrozenTrial(number=11, values=[0.7320106029510498], datetime_start=datetime.datetime(2022, 12, 1, 12, 23, 58, 427097), datetime_complete=datetime.datetime(2022, 12, 1, 12, 24, 2, 848088), params={'units': 16, 'activation': 'relu', 'rate': 0.1, 'learning_rate': 0.017925001870819347}, distributions={'units': IntDistribution(high=24, log=False, low=8, step=4), 'activation': CategoricalDistribution(choices=('relu', 'linear')), 'rate': FloatDistribution(high=0.1, log=False, low=0.0, step=0.1), 'learning_rate': FloatDistribution(high=0.1, log=True, low=1e-05, step=None)}, user_attrs={}, system_attrs={}, intermediate_values={}, trial_id=11, state=TrialState.COMPLETE, value=None)
  Value: 0.7320106029510498
  Params: 
    units: 16
    activation: relu
    rate: 0.1
    learning_rate: 0.017925001870819347


Load the best model. This model was saved in the function "objective".

In [22]:
 best_model = tf.keras.models.load_model('{0}.mdl'.format(trial.number))

### Return performance of final model on new data (test data)
TODO: only load test data here, that you get a few days before the deadline.

In [23]:
# cancer = load_breast_cancer()

# X_test = cancer['data']
# y_test = cancer['target']
#
# scaler = pickle.load(open("scaler.p", "rb"))
# # important: preprocessing of test dataset has to be the same as for the training dataset
# X_test = scaler.transform(X_test)

Predicting the Test set results

In [24]:
# y_pred = best_model.predict(X_test)
# print(y_pred)
# # create labels out of predictions
# y_pred_labels = np.argmax(y_pred, axis=1)

Making the Confusion Matrix

In [25]:
# cm = confusion_matrix(y_test, y_pred_labels)
#
# print("Our accuracy is {}%".format(((cm[0][0] + cm[1][1]) / cm.sum()) * 100))

Plot heatmap

In [26]:
# sns.heatmap(cm, annot=True)
# plt.savefig('confmat.png')
#
# print(classification_report(y_test, y_pred_labels))