In [None]:
def wrapper(learning_rate, dropout):
    
    import tensorflow as tf
    import numpy as np
    from hops import tensorboard
    from hops import hdfs
    from tensorflow.contrib.data import Dataset, Iterator
    import random
    
    # Our music genre labels
    label_dict = {
        'Classical': 0,
        'Techno': 1,
        'Pop': 2,
        'HipHop': 3,
        'Metal': 4,
        'Rock': 5
    }

    # Parameters
    n_classes = len(label_dict)
    learning_rate = 0.001
    batch_size = 100
    num_steps = 100000 / batch_size    # Size of data set
    display_step = 10
    dropout = 0.75  # Dropout, probability to keep units
    
    dataset_path = hdfs.project_path() + "Spectrograms/"
        
    # CNN methods
    
    def conv_net(x, n_classes, dropout, reuse, is_training):
        # Define a scope for reusing the variables
        with tf.variable_scope('ConvNet', reuse=reuse):
            # MNIST data input is a 1-D vector of 784 features (28*28 pixels)
            # Reshape to match picture format [Height x Width x Channel]
            # Tensor input become 4-D: [Batch Size, Height, Width, Channel]
            x = tf.reshape(x, shape=[-1, 128, 128, 1])

            # Convolution Layer with 32 filters and a kernel size of 5
            conv1 = tf.layers.conv2d(x, 32, 5, activation=tf.nn.relu)
            # Max Pooling (down-sampling) with strides of 2 and kernel size of 2
            conv1 = tf.layers.max_pooling2d(conv1, 2, 2)

            # Convolution Layer with 32 filters and a kernel size of 5
            conv2 = tf.layers.conv2d(conv1, 64, 3, activation=tf.nn.relu)
            # Max Pooling (down-sampling) with strides of 2 and kernel size of 2
            conv2 = tf.layers.max_pooling2d(conv2, 2, 2)

            # Flatten the data to a 1-D vector for the fully connected layer
            fc1 = tf.contrib.layers.flatten(conv2)

            # Fully connected layer (in contrib folder for now)
            fc1 = tf.layers.dense(fc1, 1024)
            # Apply Dropout (if is_training is False, dropout is not applied)
            fc1 = tf.layers.dropout(fc1, rate=dropout, training=is_training)

            # Output layer, class prediction
            out = tf.layers.dense(fc1, n_classes)
            # Because 'softmax_cross_entropy_with_logits' already apply softmax,
            # we only apply softmax to testing network
            out = tf.nn.softmax(out) if not is_training else out

        return out
    
    
    # Adapted from the medium guy
    def conv_net2(x, n_classes, dropout, reuse, is_training):
        with tf.variable_scope('ConvNet', reuse=reuse):
            x = tf.reshape(x, shape=[-1, 128, 128, 1])
            conv = tf.layers.conv2d(x, 64, 2, activation=tf.nn.relu)
            conv = tf.layers.max_pooling2d(conv, 2, 2)
            conv = tf.layers.conv2d(conv, 128, 2, activation=tf.nn.relu)
            conv = tf.layers.max_pooling2d(conv, 2, 2)
            conv = tf.layers.conv2d(conv, 256, 2, activation=tf.nn.relu)
            conv = tf.layers.max_pooling2d(conv, 2, 2)
            conv = tf.layers.conv2d(conv, 512, 2, activation=tf.nn.relu)
            conv = tf.layers.max_pooling2d(conv, 2, 2)
            conv = tf.contrib.layers.flatten(conv)
            conv = tf.layers.dense(conv, 1024)
            conv = tf.layers.dropout(conv, rate=dropout, training=is_training)
            out = tf.layers.dense(conv, n_classes)
            out = tf.nn.softmax(out) if not is_training else out

        return out
    
    
    # Define the model function (following TF Estimator Template)
    def model_fn(features, labels, mode, params):
        
        # Build the neural network
        # Because Dropout have different behavior at training and prediction time, we
        # need to create 2 distinct computation graphs that still share the same weights.
        logits_train = conv_net2(features, n_classes, dropout, reuse=False, is_training=True)
        logits_test = conv_net2(features, n_classes, dropout, reuse=True, is_training=False)

        # Predictions
        pred_classes = tf.argmax(logits_test, axis=1)
        pred_probas = tf.nn.softmax(logits_test)

        # If prediction mode, early return
        if mode == tf.estimator.ModeKeys.PREDICT:
            return tf.estimator.EstimatorSpec(mode, predictions=pred_classes)

        # Define loss and optimizer
        # https://datascience.stackexchange.com/a/22458
        #loss_op = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits_train, labels=tf.cast(labels, dtype=tf.int32)))
        loss_op = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits_train, labels=labels))
        optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
        train_op = optimizer.minimize(loss_op, global_step=tf.train.get_global_step())

        # Evaluate the accuracy of the model
        acc_op = tf.metrics.accuracy(labels=labels, predictions=pred_classes)

        #image = tf.reshape(features[:10], [-1, 28, 28, 1])
        #tf.summary.image("image", image)
        # tf.summary.scalar('my_accuracy', acc_op[0])

        # TF Estimators requires to return a EstimatorSpec, that specify
        # the different ops for training, evaluating, ...
        estim_specs = tf.estimator.EstimatorSpec(
          mode=mode,
          predictions=pred_classes,
          loss=loss_op,
          train_op=train_op,
          eval_metric_ops={'accuracy': acc_op})

        return estim_specs

    
    def data_input_fn(foldername, batch_size=128, shuffle=False, repeat=None):
        
        def input_parser(img_path, label_value):
            label = tf.one_hot(label_value, n_classes)
            img_file = tf.read_file(img_path)
            img_decoded = tf.cast(tf.image.decode_image(img_file, channels=1), tf.float32)
            return img_decoded, label
    
    
        def load_and_shuffle_data(data_folder):
            hdfs.log("load and shuffle data " + data_folder)
            data = []
            for genre_name in label_dict:
                filenames_path = dataset_path + data_folder + "/" + genre_name
                for filename_path in tf.gfile.Glob(filenames_path + "/*.png"):
                    filename = filename_path.split("/")[len(filename_path.split("/")) - 1]
                    genre = filename.split("_")[0]

                    # if we can't extract the label from the image we should not train on it
                    if genre not in label_dict:
                        continue;

                    label_val = int(label_dict.get(genre))
                    data.append((filename_path, label_val))

            random.shuffle(data)
            image_paths = [x[0] for x in data]
            labels = [x[1] for x in data]

            return image_paths, labels

    
        def _input_fn():
            images, labels = load_and_shuffle_data(foldername)
            hdfs.log("Loaded data from folder, size: " + foldername + ", " + str(len(images)))
            data_set = Dataset.from_tensor_slices((images, labels))
            data_set = data_set.map(input_parser)
            
            if shuffle:
                data_set = data_set.shuffle(buffer_size=128)
            
            data_set = data_set.batch(batch_size)
            data_set = data_set.repeat(repeat)
            
            iterator = data_set.make_one_shot_iterator()
            features, labels = iterator.get_next()
            
            return features, labels    
        
        return _input_fn


    run_config = tf.contrib.learn.RunConfig(
        model_dir=tensorboard.logdir(),
        save_checkpoints_steps=10,
        save_summary_steps=5,
        log_step_count_steps=10)

    hparams = tf.contrib.training.HParams(
        learning_rate=learning_rate, dropout_rate=dropout)

    summary_hook = tf.train.SummarySaverHook(
      save_steps = run_config.save_summary_steps,
      scaffold= tf.train.Scaffold(),
      summary_op=tf.summary.merge_all())

    mnist_estimator = tf.estimator.Estimator(
        model_fn=model_fn,
        config=run_config,
        params=hparams
        )

    train_input_fn = data_input_fn("training", batch_size=batch_size)
    eval_input_fn = data_input_fn("validation", batch_size=batch_size)

    experiment = tf.contrib.learn.Experiment(
        mnist_estimator,
        train_input_fn=train_input_fn,
        eval_input_fn=eval_input_fn,
        train_steps=num_steps,
        min_eval_frequency=5,
        eval_hooks=[summary_hook]
        )

    experiment.train_and_evaluate()


In [None]:
from hops import util

#Define dict for hyperparameters
args_dict = {'learning_rate': [0.005], 'dropout': [0.75]}

# Generate a grid for the given hyperparameters
args_dict_grid = util.grid_params(args_dict)

print(args_dict_grid)

In [None]:
from hops import tflauncher

tensorboard_hdfs_logdir = tflauncher.launch(spark, wrapper, args_dict_grid)