In [None]:
#import packages
import os 
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow import keras
import keras_tuner as kt
import IPython
import matplotlib.pyplot as plt

accuracies = [] #new list for metric tracking

In [None]:
data = pd.read_csv('dummydata.csv') #read data as dataframe 

In [None]:
target = pd.read_csv('dummydata_target.csv', na_values="NaN") #read category labels at dataframe 

In [None]:
#convert data and target dataframes to numpy arrays
X = data.to_numpy()
y = target.to_numpy()
y = y.flatten() 

In [None]:
#split data into test and train sets 
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y) 

In [None]:
train = tf.data.Dataset.from_tensor_slices((X_train, y_train)) #convert input data to tensorflow dataset
train = train.repeat().shuffle(1000).batch(32) #repeat dataset, shuffle in batches of 1000 and batch examples for training 
test = tf.data.Dataset.from_tensor_slices((X_test, y_test)).batch(1) #convert test data to tensorflow dataset and batch

In [None]:
#defining sequential model with 8 input nodes, 3 hidden layers and 3 output nodes 
b_model = tf.keras.Sequential() #sequential model 
b_model.add(tf.keras.layers.Flatten(input_shape=(8,))) 
b_model.add(tf.keras.layers.Dense(64, activation='relu')) 
b_model.add(tf.keras.layers.Dense(48, activation='relu')) 
b_model.add(tf.keras.layers.Dense(32, activation='relu'))  
b_model.add(tf.keras.layers.Dense(3, activation='softmax')) 

#configure model  
b_model.compile(
    optimizer='adam', 
    loss='sparse_categorical_crossentropy', 
    metrics=['accuracy']) 

In [None]:
#train model 
b_model.fit(
    train,
    validation_data=test,
    steps_per_epoch=150,
    epochs=10,
)

In [None]:
#add final accuracy score from training to metric tracking list 
b_model_accuracies = b_model.history.history['val_accuracy']  
last_b_model_accuracy = b_model_accuracies[-1]
accuracies.append(last_b_model_accuracy) 

In [None]:
#define hypermodel builder with 8 input nodes, 3 hidden layers with optimised number of units, and 3 output nodes
def build_hypermodel(hp):
    model = tf.keras.Sequential() 
    model.add(tf.keras.layers.Flatten(input_shape=(8,))) 
    model.add(tf.keras.layers.Dense(units=hp.Int(name='units_1', min_value=16, max_value=256, step=16), activation='relu')) #tune number of units in first layer between 16-256
    model.add(tf.keras.layers.Dense(hp.Int(name='units_2', min_value=1, max_value=256, step=16), activation='relu')) #tune number of units in second layer
    model.add(tf.keras.layers.Dense(hp.Int(name='units_3', min_value=1, max_value=256, step=16), activation='relu')) #tune number of units in third layer 
    model.add(tf.keras.layers.Dense(3, activation='softmax'))
    model.compile( #configure model 
        optimizer='adam',
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy'])
    return model

In [None]:
#tuner initialisation 
tuner = kt.RandomSearch(hypermodel=build_hypermodel,
                        objective='val_loss',
                        max_trials=25,
                        directory=os.path.normpath(os.getcwd()),
                        project_name='u')

In [None]:
es = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=3) #define callback to stop search early
tuner.search(X_train, y_train, epochs=20, validation_data=(X_test, y_test), verbose=2, callbacks=[es]) #search for best hyperparameters (number of units per layer)

best_hps=tuner.get_best_hyperparameters()[0] #get best hyperparameters from search 

#print best number of units per layer 
print(f"""
The hyperparameter search is complete. The optimal number of units in the first densely-connected
layer is {best_hps.get('units_1')}. The optimal number of units in the second densely-connected
layer is {best_hps.get('units_2')}. The optimal number of units in the third densely-connected
layer is {best_hps.get('units_3')}.
""")

In [None]:
#build hypermodel with best hyperparameters 
u_model = tuner.hypermodel.build(best_hps)

In [None]:
#train hypertuned model
u_model.fit(
    train,
    validation_data=test,
    steps_per_epoch=150,
    epochs=10,
)

In [None]:
#add final accuracy score from training to metric tracking list 
u_model_accuracies = u_model.history.history['val_accuracy']  
last_u_model_accuracy = u_model_accuracies[-1]
accuracies.append(last_u_model_accuracy) 

In [None]:
#define hypermodel builder with 8 input nodes, 3 hidden layers with optimised number of units and optimised activation function, and 3 output nodes
def build_hypermodel(hp):
    model = tf.keras.Sequential()
    model.add(tf.keras.layers.Flatten(input_shape=(8,)))
    model.add(tf.keras.layers.Dense(units=hp.Int(name='units_1', min_value=16, max_value=256, step=16), activation=hp.Choice(name='a_1', values=['relu','tanh','elu','selu','swish']))) #tune activation function and number of units in first layer between 16-256 
    model.add(tf.keras.layers.Dense(hp.Int(name='units_2', min_value=1, max_value=256, step=16), activation=hp.Choice(name='a_2', values=['relu','tanh','elu','selu','swish']))) #tune activation function and number of units in second layer
    model.add(tf.keras.layers.Dense(hp.Int(name='units_3', min_value=1, max_value=256, step=16), activation=hp.Choice(name='a_3', values=['relu','tanh','elu','selu','swish']))) #tune activation function and number of units in third layer
    model.add(tf.keras.layers.Dense(3, activation='softmax'))
    model.compile( #configure model 
        optimizer='adam',
        loss='sparse_categorical_crossentropy', 
        metrics=['accuracy'])  
    return model

In [None]:
#tuner initialisation 
tuner = kt.RandomSearch(hypermodel=build_hypermodel,
                        objective='val_loss',
                        max_trials=25,
                        directory=os.path.normpath(os.getcwd()), #save output to current working directory
                        project_name='a') #output folder named 'a'

In [None]:
#clears training output cell once a Keras-Tuner trial is complete
class ClearTrainingOutput(tf.keras.callbacks.Callback):
    def on_train_end(*args, **kwargs):
        IPython.display.clear_output(wait = True)

#search for best hyperparameters (number of units and activation function per layer)
tuner.search(X_train, y_train, epochs=20, validation_data=(X_test, y_test), verbose=2, callbacks=[es, ClearTrainingOutput()])

best_hps = tuner.get_best_hyperparameters(num_trials = 1)[0] #get best hyperparameters from search 

#print best activation function per layer 
print(f"""
The hyperparameter search is complete. The optimal activation function for the first densely-connected
layer is {best_hps.get('a_1')}. The optimal activation function for the second densely-connected
layer is {best_hps.get('a_2')}. The optimal activation function for the third densely-connected
layer is {best_hps.get('a_3')}.
""")

In [None]:
#build hypermodel with best hyperparameters 
a_model = tuner.hypermodel.build(best_hps)

In [None]:
#train hypertuned model
a_model.fit(
    train,
    validation_data=test,
    steps_per_epoch=150,
    epochs=10,
)

In [None]:
#add final accuracy score from training to metric tracking list 
a_model_accuracies = a_model.history.history['val_accuracy']  
last_a_model_accuracy = a_model_accuracies[-1]
accuracies.append(last_a_model_accuracy) 

In [None]:
#plot bar chart with accuracy values for each model 
labels = ['Baseline', 'Tuned Units', 'Tuned Activation']
x_pos = [i for i, _ in enumerate(labels)]
plt.bar(x_pos, accuracies, color='green')
plt.ylabel("Accuracy")
plt.xticks(x_pos, labels)

plt.show()

In [None]:
target_names = np.array(['exclusively recreational', 'some medical', 'exclusively medical']) #specifies category labels

#true labels and values of novel samples for predicting category membership
predict_true_labels = ["exclusively recreational", "some medical", "exclusively medical"]
predict_X = [
    [21, 0, 50, 2, 1, 0.75, 1, 0], #exclusively recreational cannabis user
    [30, 0, 200, 5, 5, 10, 0, 1], #sometimes medical cannabis user
    [62, 1, 360, 6, 13, 20, 0, 1], #exclusively recreational cannabis user 
]

#b model predicts category labels 
predictions = b_model.predict(predict_X)

[target_names[p.argmax()] for p in predictions]

#for each sample print predicted category labels with probability values and expected category labels
for pred_dict, expected in zip(predictions, predict_true_labels):
    predicted_index = pred_dict.argmax()
    predicted = target_names[predicted_index]
    probability = pred_dict.max()
    tick_cross = "✓" if predicted == expected else "✗"
    print(f"{tick_cross} B model prediction is '{predicted}' ({100 * probability:.1f}%), expected '{expected}'")

In [None]:
target_names = np.array(['exclusively recreational', 'some medical', 'exclusively medical']) #specifies category labels

#true labels and values of novel samples for predicting category membership
predict_true_labels = ["exclusively recreational", "some medical", "exclusively medical"]
predict_X = [
    [21, 0, 50, 2, 1, 0.75, 1, 0],
    [30, 0, 200, 5, 5, 10, 0, 1],
    [62, 1, 360, 6, 13, 20, 0, 1],
]

#u model predicts category labels 
predictions = u_model.predict(predict_X)

[target_names[p.argmax()] for p in predictions]

#for each sample print predicted category labels with probability values and expected category labels
for pred_dict, expected in zip(predictions, predict_true_labels):
    predicted_index = pred_dict.argmax()
    predicted = target_names[predicted_index]
    probability = pred_dict.max()
    tick_cross = "✓" if predicted == expected else "✗"
    print(f"{tick_cross} U model prediction is '{predicted}' ({100 * probability:.1f}%), expected '{expected}'")

In [None]:
target_names = np.array(['exclusively recreational', 'some medical', 'exclusively medical']) #specifies category labels

#true labels and values of novel samples for predicting category membership
predict_true_labels = ["exclusively recreational", "some medical", "exclusively medical"]
predict_X = [
    [21, 0, 50, 2, 1, 0.75, 1, 0],
    [30, 0, 200, 5, 5, 10, 0, 1],
    [62, 1, 360, 6, 13, 20, 0, 1],
]

#a model predicts category labels 
predictions = a_model.predict(predict_X)

[target_names[p.argmax()] for p in predictions]

#for each sample print predicted category labels with probability values and expected category labels
for pred_dict, expected in zip(predictions, predict_true_labels):
    predicted_index = pred_dict.argmax()
    predicted = target_names[predicted_index]
    probability = pred_dict.max()
    tick_cross = "✓" if predicted == expected else "✗"
    print(f"{tick_cross} A model prediction is '{predicted}' ({100 * probability:.1f}%), expected '{expected}'")