In [None]:
spark

In [None]:
from maggy import Searchspace
from hops import featurestore

In [None]:
# The searchspace can be instantiated with parameters
sp = Searchspace(n_layers=('INTEGER', [2,3,4,5]), n_neurons=('INTEGER', [128, 256]) , l_rate=('DISCRETE', [0.001, 0.09]))

In [None]:
def aml_parallel_experiments(n_layers,n_neurons,l_rate):
    
    import os
    import sys
    import uuid
    import random
    
    import numpy as np
    
    from tensorflow import keras
    import tensorflow as tf
    from tensorflow.keras.datasets import mnist
    from tensorflow.keras.models import Sequential
    from tensorflow.keras.layers import Dense, Dropout, Flatten
    from tensorflow.keras.layers import Conv2D, MaxPooling2D
    from tensorflow.keras.callbacks import TensorBoard
    from tensorflow.keras import backend as K

    import math
    from hops import tensorboard

    from hops import model as hops_model
    from hops import hdfs

    import pydoop.hdfs as pydoop
    
    batch_size=32
    num_classes = 1

    # Define input function
    def create_tf_dataset(n_epochs, b_size):
        dataset_dir = featurestore.get_training_dataset_path("embeddings_training_dataset",
                                                                featurestore=featurestore.project_featurestore(),
                                                                training_dataset_version=featurestore.get_latest_training_dataset_version("embeddings_training_dataset")
                                                            )
    
        input_files = tf.io.gfile.glob(dataset_dir.replace("hopsfs","hdfs") + "/part-r-*")
        dataset = tf.data.TFRecordDataset(input_files)
        tf_record_schema = featurestore.get_training_dataset_tf_record_schema("embeddings_training_dataset")
        feature_names = feature_names = [feat.name for feat in tf_record_schema]
        feature_names = [feat for feat in tf_record_schema]
        label_name = "label"
        feature_names.remove(label_name)

        def decode(example_proto):
            example = tf.io.parse_single_example(example_proto, tf_record_schema)
            x = []
            for feature_name in feature_names:
                if feature_name != "label" and feature_name != "id":
                    x.append(example[feature_name])                
            y = [tf.cast(example[label_name], tf.float32)]
            if len(x) == 1:
                x = x[0]
            else:
                x = tf.stack(x)            
            return x,y

        dataset = dataset.map(decode).shuffle(1000).batch(b_size).repeat(n_epochs)
        return dataset

    
    # Define a Keras Model.
    model = tf.keras.Sequential()
    model.add(tf.keras.layers.Dense(n_neurons, activation='relu', input_shape=(128,)))
    for _ in n_layers:
        model.add(tf.keras.layers.Dense(n_neurons, activation='relu'))

    # Compile the model.
    model.compile(loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
                  optimizer= tf.keras.optimizers.Adam(l_rate),
                  metrics=['accuracy']
                 )
        
    callbacks = [
        tf.keras.callbacks.TensorBoard(log_dir=tensorboard.logdir()),
        tf.keras.callbacks.ModelCheckpoint(filepath=tensorboard.logdir()),
    ]
    
    model.fit(data_input(train_filenames, batch_size), 
        verbose=0,
        epochs=3, 
        steps_per_epoch=5,
        validation_data=data_input(validation_filenames, batch_size),
        validation_steps=1,                    
        callbacks=callbacks
    )
    
    score = model.evaluate(data_input(validation_filenames, batch_size), steps=1)

    return score[1]

In [None]:
from maggy import experiment
result = experiment.lagom(aml_parallel_experiments, 
                           searchspace=sp, 
                           optimizer='randomsearch', 
                           direction='max',
                           num_trials=10, 
                           name='anomaly_detection',
                           hb_interval=5, 
                           es_interval=5,
                           es_min=5
                          )

In [None]:
import json
from hops import hdfs
AML_FF_HYPERPARAMS_FILE = 'aml_ff_best_hp.json'
hdfs.dump(json.dumps(result['best_hp']), "Resources/" + AML_FF_HYPERPARAMS_FILE)