In [4]:
import argparse
import os

from tensorflow.keras.datasets import mnist

import ray
from ray import tune
from ray.tune.schedulers import AsyncHyperBandScheduler
from ray.tune.integration.keras import TuneReportCallback

In [5]:
def train_mnist(config):
    # https://github.com/tensorflow/tensorflow/issues/32159
    import tensorflow as tf

    batch_size = 128
    num_classes = 10
    epochs = 12

    (x_train, y_train), (x_test, y_test) = mnist.load_data()
    
    x_train, x_test = x_train / 255.0, x_test / 255.0
    model = tf.keras.models.Sequential(
        [
            tf.keras.layers.Flatten(input_shape=(28, 28)),
            tf.keras.layers.Dense(config["hidden"], activation="relu"),
            tf.keras.layers.Dropout(0.2),
            tf.keras.layers.Dense(num_classes, activation="softmax"),
        ]
    )

    model.compile(
        loss="sparse_categorical_crossentropy",
        optimizer=tf.keras.optimizers.SGD(lr=config["lr"], momentum=config["momentum"]),
        metrics=["accuracy"],
    )

    model.fit(
        x_train,
        y_train,
        batch_size=batch_size,
        epochs=epochs,
        verbose=0,
        validation_data=(x_test, y_test),
        callbacks=[TuneReportCallback({"mean_accuracy": "accuracy"})],
    )

In [6]:
def tune_mnist(num_training_iterations):
    sched = AsyncHyperBandScheduler(
        time_attr="training_iteration", max_t=400, grace_period=20
    )

    analysis = tune.run(
        train_mnist,
        name="exp",
        scheduler=sched,
        metric="mean_accuracy",
        mode="max",
        stop={"mean_accuracy": 0.99, "training_iteration": num_training_iterations},
        num_samples=10,
        resources_per_trial={"cpu": 2, "gpu": 0},
        config={
            "threads": 2,
            "lr": tune.uniform(0.001, 0.1),
            "momentum": tune.uniform(0.1, 0.9),
            "hidden": tune.randint(32, 512),
        },
    )
    print("Best hyperparameters found were: ", analysis.best_config)


In [7]:
ray.init(num_cpus=8)
tune_mnist(num_training_iterations=5)



Trial name,status,loc,hidden,lr,momentum
train_mnist_fbec5_00000,RUNNING,10.99.252.58:14929,490,0.0273234,0.679861
train_mnist_fbec5_00001,PENDING,,472,0.0249893,0.838203
train_mnist_fbec5_00002,PENDING,,273,0.0461685,0.291584
train_mnist_fbec5_00003,PENDING,,226,0.0037293,0.638083
train_mnist_fbec5_00004,PENDING,,135,0.0452124,0.246215
train_mnist_fbec5_00005,PENDING,,397,0.0481884,0.604052
train_mnist_fbec5_00006,PENDING,,106,0.0605166,0.205449
train_mnist_fbec5_00007,PENDING,,49,0.0463247,0.564804
train_mnist_fbec5_00008,PENDING,,312,0.00919799,0.811434
train_mnist_fbec5_00009,PENDING,,80,0.0932895,0.112504


[2m[36m(train_mnist pid=14929)[0m Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
   16384/11490434 [..............................] - ETA: 3s


Trial name,status,loc,hidden,lr,momentum
train_mnist_fbec5_00000,RUNNING,10.99.252.58:14929,490,0.0273234,0.679861
train_mnist_fbec5_00001,RUNNING,10.99.252.58:14932,472,0.0249893,0.838203
train_mnist_fbec5_00002,RUNNING,10.99.252.58:14934,273,0.0461685,0.291584
train_mnist_fbec5_00003,RUNNING,10.99.252.58:14933,226,0.0037293,0.638083
train_mnist_fbec5_00004,PENDING,,135,0.0452124,0.246215
train_mnist_fbec5_00005,PENDING,,397,0.0481884,0.604052
train_mnist_fbec5_00006,PENDING,,106,0.0605166,0.205449
train_mnist_fbec5_00007,PENDING,,49,0.0463247,0.564804
train_mnist_fbec5_00008,PENDING,,312,0.00919799,0.811434
train_mnist_fbec5_00009,PENDING,,80,0.0932895,0.112504


[2m[36m(train_mnist pid=14929)[0m 2022-02-25 17:34:00.081526: E tensorflow/stream_executor/cuda/cuda_driver.cc:271] failed call to cuInit: CUDA_ERROR_NO_DEVICE: no CUDA-capable device is detected
[2m[36m(train_mnist pid=14929)[0m 2022-02-25 17:34:00.081668: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:169] retrieving CUDA diagnostic information for host: c2178
[2m[36m(train_mnist pid=14929)[0m 2022-02-25 17:34:00.081687: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:176] hostname: c2178
[2m[36m(train_mnist pid=14929)[0m 2022-02-25 17:34:00.081885: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:200] libcuda reported version is: 470.82.1
[2m[36m(train_mnist pid=14929)[0m 2022-02-25 17:34:00.081951: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:204] kernel reported version is: 470.82.1
[2m[36m(train_mnist pid=14929)[0m 2022-02-25 17:34:00.081967: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:310] kernel version seems to match DSO: 

Trial name,status,loc,hidden,lr,momentum
train_mnist_fbec5_00000,RUNNING,10.99.252.58:14929,490,0.0273234,0.679861
train_mnist_fbec5_00001,RUNNING,10.99.252.58:14932,472,0.0249893,0.838203
train_mnist_fbec5_00002,RUNNING,10.99.252.58:14934,273,0.0461685,0.291584
train_mnist_fbec5_00003,RUNNING,10.99.252.58:14933,226,0.0037293,0.638083
train_mnist_fbec5_00004,PENDING,,135,0.0452124,0.246215
train_mnist_fbec5_00005,PENDING,,397,0.0481884,0.604052
train_mnist_fbec5_00006,PENDING,,106,0.0605166,0.205449
train_mnist_fbec5_00007,PENDING,,49,0.0463247,0.564804
train_mnist_fbec5_00008,PENDING,,312,0.00919799,0.811434
train_mnist_fbec5_00009,PENDING,,80,0.0932895,0.112504


[2m[36m(train_mnist pid=14932)[0m 2022-02-25 17:34:02.969044: E tensorflow/stream_executor/cuda/cuda_driver.cc:271] failed call to cuInit: CUDA_ERROR_NO_DEVICE: no CUDA-capable device is detected
[2m[36m(train_mnist pid=14932)[0m 2022-02-25 17:34:02.969138: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:169] retrieving CUDA diagnostic information for host: c2178
[2m[36m(train_mnist pid=14932)[0m 2022-02-25 17:34:02.969156: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:176] hostname: c2178
[2m[36m(train_mnist pid=14932)[0m 2022-02-25 17:34:02.969313: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:200] libcuda reported version is: 470.82.1
[2m[36m(train_mnist pid=14932)[0m 2022-02-25 17:34:02.969384: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:204] kernel reported version is: 470.82.1
[2m[36m(train_mnist pid=14932)[0m 2022-02-25 17:34:02.969399: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:310] kernel version seems to match DSO: 

Result for train_mnist_fbec5_00002:
  date: 2022-02-25_17-34-03
  done: false
  experiment_id: 51fb7d7566b640d8bf3e92ddf523ce7f
  hostname: c2178
  iterations_since_restore: 1
  mean_accuracy: 0.8469833135604858
  node_ip: 10.99.252.58
  pid: 14934
  time_since_restore: 3.750588893890381
  time_this_iter_s: 3.750588893890381
  time_total_s: 3.750588893890381
  timestamp: 1645828443
  timesteps_since_restore: 0
  training_iteration: 1
  trial_id: fbec5_00002
  


[2m[36m(train_mnist pid=14932)[0m 2022-02-25 17:34:03.340239: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:185] None of the MLIR Optimization Passes are enabled (registered 2)
[2m[36m(train_mnist pid=14933)[0m 2022-02-25 17:34:03.375159: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:185] None of the MLIR Optimization Passes are enabled (registered 2)


Result for train_mnist_fbec5_00000:
  date: 2022-02-25_17-34-03
  done: false
  experiment_id: 65d9a5a279574f888e1d05341ec388e1
  hostname: c2178
  iterations_since_restore: 1
  mean_accuracy: 0.8642833232879639
  node_ip: 10.99.252.58
  pid: 14929
  time_since_restore: 7.235527992248535
  time_this_iter_s: 7.235527992248535
  time_total_s: 7.235527992248535
  timestamp: 1645828443
  timesteps_since_restore: 0
  training_iteration: 1
  trial_id: fbec5_00000
  
Result for train_mnist_fbec5_00003:
  date: 2022-02-25_17-34-05
  done: false
  experiment_id: d70c8b6f7738449ab5a17b792ced681d
  hostname: c2178
  iterations_since_restore: 1
  mean_accuracy: 0.7064999938011169
  node_ip: 10.99.252.58
  pid: 14933
  time_since_restore: 5.841726064682007
  time_this_iter_s: 5.841726064682007
  time_total_s: 5.841726064682007
  timestamp: 1645828445
  timesteps_since_restore: 0
  training_iteration: 1
  trial_id: fbec5_00003
  
Result for train_mnist_fbec5_00001:
  date: 2022-02-25_17-34-06
  done

Trial name,status,loc,hidden,lr,momentum,acc,iter,total time (s)
train_mnist_fbec5_00000,RUNNING,10.99.252.58:14929,490,0.0273234,0.679861,0.923767,2.0,9.7814
train_mnist_fbec5_00001,RUNNING,10.99.252.58:14932,472,0.0249893,0.838203,0.8839,1.0,6.57837
train_mnist_fbec5_00002,RUNNING,10.99.252.58:14934,273,0.0461685,0.291584,0.928017,3.0,7.23639
train_mnist_fbec5_00003,RUNNING,10.99.252.58:14933,226,0.0037293,0.638083,0.84615,2.0,7.43015
train_mnist_fbec5_00004,PENDING,,135,0.0452124,0.246215,,,
train_mnist_fbec5_00005,PENDING,,397,0.0481884,0.604052,,,
train_mnist_fbec5_00006,PENDING,,106,0.0605166,0.205449,,,
train_mnist_fbec5_00007,PENDING,,49,0.0463247,0.564804,,,
train_mnist_fbec5_00008,PENDING,,312,0.00919799,0.811434,,,
train_mnist_fbec5_00009,PENDING,,80,0.0932895,0.112504,,,


Result for train_mnist_fbec5_00002:
  date: 2022-02-25_17-34-08
  done: false
  experiment_id: 51fb7d7566b640d8bf3e92ddf523ce7f
  hostname: c2178
  iterations_since_restore: 4
  mean_accuracy: 0.9380499720573425
  node_ip: 10.99.252.58
  pid: 14934
  time_since_restore: 9.09532356262207
  time_this_iter_s: 1.8589298725128174
  time_total_s: 9.09532356262207
  timestamp: 1645828448
  timesteps_since_restore: 0
  training_iteration: 4
  trial_id: fbec5_00002
  
Result for train_mnist_fbec5_00000:
  date: 2022-02-25_17-34-08
  done: false
  experiment_id: 65d9a5a279574f888e1d05341ec388e1
  hostname: c2178
  iterations_since_restore: 3
  mean_accuracy: 0.9380166530609131
  node_ip: 10.99.252.58
  pid: 14929
  time_since_restore: 12.390630722045898
  time_this_iter_s: 2.609227418899536
  time_total_s: 12.390630722045898
  timestamp: 1645828448
  timesteps_since_restore: 0
  training_iteration: 3
  trial_id: fbec5_00000
  
Result for train_mnist_fbec5_00002:
  date: 2022-02-25_17-34-10
  don

Trial name,status,loc,hidden,lr,momentum,acc,iter,total time (s)
train_mnist_fbec5_00001,RUNNING,10.99.252.58:14932,472,0.0249893,0.838203,0.961367,4.0,13.1182
train_mnist_fbec5_00004,PENDING,,135,0.0452124,0.246215,,,
train_mnist_fbec5_00005,PENDING,,397,0.0481884,0.604052,,,
train_mnist_fbec5_00006,PENDING,,106,0.0605166,0.205449,,,
train_mnist_fbec5_00007,PENDING,,49,0.0463247,0.564804,,,
train_mnist_fbec5_00008,PENDING,,312,0.00919799,0.811434,,,
train_mnist_fbec5_00009,PENDING,,80,0.0932895,0.112504,,,
train_mnist_fbec5_00000,TERMINATED,10.99.252.58:14929,490,0.0273234,0.679861,0.95345,5.0,16.3513
train_mnist_fbec5_00002,TERMINATED,10.99.252.58:14934,273,0.0461685,0.291584,0.944883,5.0,10.9532
train_mnist_fbec5_00003,TERMINATED,10.99.252.58:14933,226,0.0037293,0.638083,0.8901,5.0,11.9851


Result for train_mnist_fbec5_00001:
  date: 2022-02-25_17-34-14
  done: true
  experiment_id: 29b822715e674912957346872fff93f2
  hostname: c2178
  iterations_since_restore: 5
  mean_accuracy: 0.9679166674613953
  node_ip: 10.99.252.58
  pid: 14932
  time_since_restore: 14.578320503234863
  time_this_iter_s: 1.4601638317108154
  time_total_s: 14.578320503234863
  timestamp: 1645828454
  timesteps_since_restore: 0
  training_iteration: 5
  trial_id: fbec5_00001
  


[2m[36m(train_mnist pid=14930)[0m 2022-02-25 17:34:20.265877: E tensorflow/stream_executor/cuda/cuda_driver.cc:271] failed call to cuInit: CUDA_ERROR_NO_DEVICE: no CUDA-capable device is detected
[2m[36m(train_mnist pid=14930)[0m 2022-02-25 17:34:20.265986: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:169] retrieving CUDA diagnostic information for host: c2178
[2m[36m(train_mnist pid=14930)[0m 2022-02-25 17:34:20.266019: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:176] hostname: c2178
[2m[36m(train_mnist pid=14930)[0m 2022-02-25 17:34:20.266220: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:200] libcuda reported version is: 470.82.1
[2m[36m(train_mnist pid=14930)[0m 2022-02-25 17:34:20.266308: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:204] kernel reported version is: 470.82.1
[2m[36m(train_mnist pid=14930)[0m 2022-02-25 17:34:20.266330: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:310] kernel version seems to match DSO: 

Trial name,status,loc,hidden,lr,momentum,acc,iter,total time (s)
train_mnist_fbec5_00004,RUNNING,10.99.252.58:14930,135,0.0452124,0.246215,,,
train_mnist_fbec5_00005,RUNNING,10.99.252.58:14928,397,0.0481884,0.604052,,,
train_mnist_fbec5_00006,RUNNING,10.99.252.58:14935,106,0.0605166,0.205449,,,
train_mnist_fbec5_00007,RUNNING,10.99.252.58:14931,49,0.0463247,0.564804,,,
train_mnist_fbec5_00008,PENDING,,312,0.00919799,0.811434,,,
train_mnist_fbec5_00009,PENDING,,80,0.0932895,0.112504,,,
train_mnist_fbec5_00000,TERMINATED,10.99.252.58:14929,490,0.0273234,0.679861,0.95345,5.0,16.3513
train_mnist_fbec5_00001,TERMINATED,10.99.252.58:14932,472,0.0249893,0.838203,0.967917,5.0,14.5783
train_mnist_fbec5_00002,TERMINATED,10.99.252.58:14934,273,0.0461685,0.291584,0.944883,5.0,10.9532
train_mnist_fbec5_00003,TERMINATED,10.99.252.58:14933,226,0.0037293,0.638083,0.8901,5.0,11.9851


Result for train_mnist_fbec5_00006:
  date: 2022-02-25_17-34-22
  done: false
  experiment_id: 5ea989d861bf4d7593bdd75e9b2485c5
  hostname: c2178
  iterations_since_restore: 1
  mean_accuracy: 0.835349977016449
  node_ip: 10.99.252.58
  pid: 14935
  time_since_restore: 3.222780466079712
  time_this_iter_s: 3.222780466079712
  time_total_s: 3.222780466079712
  timestamp: 1645828462
  timesteps_since_restore: 0
  training_iteration: 1
  trial_id: fbec5_00006
  
Result for train_mnist_fbec5_00004:
  date: 2022-02-25_17-34-22
  done: false
  experiment_id: a214fdc1117749cbb8584601c00f3102
  hostname: c2178
  iterations_since_restore: 1
  mean_accuracy: 0.8300166726112366
  node_ip: 10.99.252.58
  pid: 14930
  time_since_restore: 5.317613124847412
  time_this_iter_s: 5.317613124847412
  time_total_s: 5.317613124847412
  timestamp: 1645828462
  timesteps_since_restore: 0
  training_iteration: 1
  trial_id: fbec5_00004
  
Result for train_mnist_fbec5_00005:
  date: 2022-02-25_17-34-23
  done:

Trial name,status,loc,hidden,lr,momentum,acc,iter,total time (s)
train_mnist_fbec5_00004,RUNNING,10.99.252.58:14930,135,0.0452124,0.246215,0.90295,2.0,6.56239
train_mnist_fbec5_00005,RUNNING,10.99.252.58:14928,397,0.0481884,0.604052,0.876233,1.0,4.56795
train_mnist_fbec5_00006,RUNNING,10.99.252.58:14935,106,0.0605166,0.205449,0.9232,3.0,5.41597
train_mnist_fbec5_00007,RUNNING,10.99.252.58:14931,49,0.0463247,0.564804,,,
train_mnist_fbec5_00008,PENDING,,312,0.00919799,0.811434,,,
train_mnist_fbec5_00009,PENDING,,80,0.0932895,0.112504,,,
train_mnist_fbec5_00000,TERMINATED,10.99.252.58:14929,490,0.0273234,0.679861,0.95345,5.0,16.3513
train_mnist_fbec5_00001,TERMINATED,10.99.252.58:14932,472,0.0249893,0.838203,0.967917,5.0,14.5783
train_mnist_fbec5_00002,TERMINATED,10.99.252.58:14934,273,0.0461685,0.291584,0.944883,5.0,10.9532
train_mnist_fbec5_00003,TERMINATED,10.99.252.58:14933,226,0.0037293,0.638083,0.8901,5.0,11.9851


[2m[36m(train_mnist pid=14931)[0m 2022-02-25 17:34:24.801919: E tensorflow/stream_executor/cuda/cuda_driver.cc:271] failed call to cuInit: CUDA_ERROR_NO_DEVICE: no CUDA-capable device is detected
[2m[36m(train_mnist pid=14931)[0m 2022-02-25 17:34:24.802018: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:169] retrieving CUDA diagnostic information for host: c2178
[2m[36m(train_mnist pid=14931)[0m 2022-02-25 17:34:24.802035: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:176] hostname: c2178
[2m[36m(train_mnist pid=14931)[0m 2022-02-25 17:34:24.802195: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:200] libcuda reported version is: 470.82.1
[2m[36m(train_mnist pid=14931)[0m 2022-02-25 17:34:24.802280: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:204] kernel reported version is: 470.82.1
[2m[36m(train_mnist pid=14931)[0m 2022-02-25 17:34:24.802296: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:310] kernel version seems to match DSO: 

Result for train_mnist_fbec5_00006:
  date: 2022-02-25_17-34-26
  done: true
  experiment_id: 5ea989d861bf4d7593bdd75e9b2485c5
  hostname: c2178
  iterations_since_restore: 5
  mean_accuracy: 0.9420999884605408
  node_ip: 10.99.252.58
  pid: 14935
  time_since_restore: 7.599065542221069
  time_this_iter_s: 1.0770421028137207
  time_total_s: 7.599065542221069
  timestamp: 1645828466
  timesteps_since_restore: 0
  training_iteration: 5
  trial_id: fbec5_00006
  
Result for train_mnist_fbec5_00007:
  date: 2022-02-25_17-34-26
  done: false
  experiment_id: 2d0975b6a5f549a3863fea439eb61cce
  hostname: c2178
  iterations_since_restore: 1
  mean_accuracy: 0.8319000005722046
  node_ip: 10.99.252.58
  pid: 14931
  time_since_restore: 5.285701513290405
  time_this_iter_s: 5.285701513290405
  time_total_s: 5.285701513290405
  timestamp: 1645828466
  timesteps_since_restore: 0
  training_iteration: 1
  trial_id: fbec5_00007
  
Result for train_mnist_fbec5_00004:
  date: 2022-02-25_17-34-27
  done

Trial name,status,loc,hidden,lr,momentum,acc,iter,total time (s)
train_mnist_fbec5_00005,RUNNING,10.99.252.58:14928,397,0.0481884,0.604052,0.956133,4.0,10.005
train_mnist_fbec5_00008,PENDING,,312,0.00919799,0.811434,,,
train_mnist_fbec5_00009,PENDING,,80,0.0932895,0.112504,,,
train_mnist_fbec5_00000,TERMINATED,10.99.252.58:14929,490,0.0273234,0.679861,0.95345,5.0,16.3513
train_mnist_fbec5_00001,TERMINATED,10.99.252.58:14932,472,0.0249893,0.838203,0.967917,5.0,14.5783
train_mnist_fbec5_00002,TERMINATED,10.99.252.58:14934,273,0.0461685,0.291584,0.944883,5.0,10.9532
train_mnist_fbec5_00003,TERMINATED,10.99.252.58:14933,226,0.0037293,0.638083,0.8901,5.0,11.9851
train_mnist_fbec5_00004,TERMINATED,10.99.252.58:14930,135,0.0452124,0.246215,0.936933,5.0,10.1773
train_mnist_fbec5_00006,TERMINATED,10.99.252.58:14935,106,0.0605166,0.205449,0.9421,5.0,7.59907
train_mnist_fbec5_00007,TERMINATED,10.99.252.58:14931,49,0.0463247,0.564804,0.93375,5.0,8.11769


Result for train_mnist_fbec5_00005:
  date: 2022-02-25_17-34-30
  done: true
  experiment_id: b8c52ae344fe4edf8a2344f2b8719644
  hostname: c2178
  iterations_since_restore: 5
  mean_accuracy: 0.9613333344459534
  node_ip: 10.99.252.58
  pid: 14928
  time_since_restore: 11.437591075897217
  time_this_iter_s: 1.4325687885284424
  time_total_s: 11.437591075897217
  timestamp: 1645828470
  timesteps_since_restore: 0
  training_iteration: 5
  trial_id: fbec5_00005
  


Trial name,status,loc,hidden,lr,momentum,acc,iter,total time (s)
train_mnist_fbec5_00008,RUNNING,10.99.252.58:16076,312,0.00919799,0.811434,,,
train_mnist_fbec5_00009,RUNNING,10.99.252.58:16077,80,0.0932895,0.112504,,,
train_mnist_fbec5_00000,TERMINATED,10.99.252.58:14929,490,0.0273234,0.679861,0.95345,5.0,16.3513
train_mnist_fbec5_00001,TERMINATED,10.99.252.58:14932,472,0.0249893,0.838203,0.967917,5.0,14.5783
train_mnist_fbec5_00002,TERMINATED,10.99.252.58:14934,273,0.0461685,0.291584,0.944883,5.0,10.9532
train_mnist_fbec5_00003,TERMINATED,10.99.252.58:14933,226,0.0037293,0.638083,0.8901,5.0,11.9851
train_mnist_fbec5_00004,TERMINATED,10.99.252.58:14930,135,0.0452124,0.246215,0.936933,5.0,10.1773
train_mnist_fbec5_00005,TERMINATED,10.99.252.58:14928,397,0.0481884,0.604052,0.961333,5.0,11.4376
train_mnist_fbec5_00006,TERMINATED,10.99.252.58:14935,106,0.0605166,0.205449,0.9421,5.0,7.59907
train_mnist_fbec5_00007,TERMINATED,10.99.252.58:14931,49,0.0463247,0.564804,0.93375,5.0,8.11769


[2m[36m(train_mnist pid=16076)[0m 2022-02-25 17:34:36.810413: E tensorflow/stream_executor/cuda/cuda_driver.cc:271] failed call to cuInit: CUDA_ERROR_NO_DEVICE: no CUDA-capable device is detected
[2m[36m(train_mnist pid=16076)[0m 2022-02-25 17:34:36.810509: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:169] retrieving CUDA diagnostic information for host: c2178
[2m[36m(train_mnist pid=16076)[0m 2022-02-25 17:34:36.810526: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:176] hostname: c2178
[2m[36m(train_mnist pid=16076)[0m 2022-02-25 17:34:36.810669: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:200] libcuda reported version is: 470.82.1
[2m[36m(train_mnist pid=16076)[0m 2022-02-25 17:34:36.810764: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:204] kernel reported version is: 470.82.1
[2m[36m(train_mnist pid=16076)[0m 2022-02-25 17:34:36.810780: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:310] kernel version seems to match DSO: 

Result for train_mnist_fbec5_00009:
  date: 2022-02-25_17-34-38
  done: false
  experiment_id: 619768bd519f43588fd43af80aa3bd41
  hostname: c2178
  iterations_since_restore: 1
  mean_accuracy: 0.8481000065803528
  node_ip: 10.99.252.58
  pid: 16077
  time_since_restore: 4.9715895652771
  time_this_iter_s: 4.9715895652771
  time_total_s: 4.9715895652771
  timestamp: 1645828478
  timesteps_since_restore: 0
  training_iteration: 1
  trial_id: fbec5_00009
  
Result for train_mnist_fbec5_00008:
  date: 2022-02-25_17-34-39
  done: false
  experiment_id: 79170a87696c401c915f21330857da0c
  hostname: c2178
  iterations_since_restore: 1
  mean_accuracy: 0.8312333226203918
  node_ip: 10.99.252.58
  pid: 16076
  time_since_restore: 5.564232110977173
  time_this_iter_s: 5.564232110977173
  time_total_s: 5.564232110977173
  timestamp: 1645828479
  timesteps_since_restore: 0
  training_iteration: 1
  trial_id: fbec5_00008
  


Trial name,status,loc,hidden,lr,momentum,acc,iter,total time (s)
train_mnist_fbec5_00008,RUNNING,10.99.252.58:16076,312,0.00919799,0.811434,0.831233,1,5.56423
train_mnist_fbec5_00009,RUNNING,10.99.252.58:16077,80,0.0932895,0.112504,0.928983,3,6.56949
train_mnist_fbec5_00000,TERMINATED,10.99.252.58:14929,490,0.0273234,0.679861,0.95345,5,16.3513
train_mnist_fbec5_00001,TERMINATED,10.99.252.58:14932,472,0.0249893,0.838203,0.967917,5,14.5783
train_mnist_fbec5_00002,TERMINATED,10.99.252.58:14934,273,0.0461685,0.291584,0.944883,5,10.9532
train_mnist_fbec5_00003,TERMINATED,10.99.252.58:14933,226,0.0037293,0.638083,0.8901,5,11.9851
train_mnist_fbec5_00004,TERMINATED,10.99.252.58:14930,135,0.0452124,0.246215,0.936933,5,10.1773
train_mnist_fbec5_00005,TERMINATED,10.99.252.58:14928,397,0.0481884,0.604052,0.961333,5,11.4376
train_mnist_fbec5_00006,TERMINATED,10.99.252.58:14935,106,0.0605166,0.205449,0.9421,5,7.59907
train_mnist_fbec5_00007,TERMINATED,10.99.252.58:14931,49,0.0463247,0.564804,0.93375,5,8.11769


Result for train_mnist_fbec5_00009:
  date: 2022-02-25_17-34-41
  done: true
  experiment_id: 619768bd519f43588fd43af80aa3bd41
  hostname: c2178
  iterations_since_restore: 5
  mean_accuracy: 0.9438999891281128
  node_ip: 10.99.252.58
  pid: 16077
  time_since_restore: 8.124834537506104
  time_this_iter_s: 0.7625389099121094
  time_total_s: 8.124834537506104
  timestamp: 1645828481
  timesteps_since_restore: 0
  training_iteration: 5
  trial_id: fbec5_00009
  
Result for train_mnist_fbec5_00008:
  date: 2022-02-25_17-34-44
  done: true
  experiment_id: 79170a87696c401c915f21330857da0c
  hostname: c2178
  iterations_since_restore: 5
  mean_accuracy: 0.9379000067710876
  node_ip: 10.99.252.58
  pid: 16076
  time_since_restore: 10.60762643814087
  time_this_iter_s: 1.142256736755371
  time_total_s: 10.60762643814087
  timestamp: 1645828484
  timesteps_since_restore: 0
  training_iteration: 5
  trial_id: fbec5_00008
  


Trial name,status,loc,hidden,lr,momentum,acc,iter,total time (s)
train_mnist_fbec5_00000,TERMINATED,10.99.252.58:14929,490,0.0273234,0.679861,0.95345,5,16.3513
train_mnist_fbec5_00001,TERMINATED,10.99.252.58:14932,472,0.0249893,0.838203,0.967917,5,14.5783
train_mnist_fbec5_00002,TERMINATED,10.99.252.58:14934,273,0.0461685,0.291584,0.944883,5,10.9532
train_mnist_fbec5_00003,TERMINATED,10.99.252.58:14933,226,0.0037293,0.638083,0.8901,5,11.9851
train_mnist_fbec5_00004,TERMINATED,10.99.252.58:14930,135,0.0452124,0.246215,0.936933,5,10.1773
train_mnist_fbec5_00005,TERMINATED,10.99.252.58:14928,397,0.0481884,0.604052,0.961333,5,11.4376
train_mnist_fbec5_00006,TERMINATED,10.99.252.58:14935,106,0.0605166,0.205449,0.9421,5,7.59907
train_mnist_fbec5_00007,TERMINATED,10.99.252.58:14931,49,0.0463247,0.564804,0.93375,5,8.11769
train_mnist_fbec5_00008,TERMINATED,10.99.252.58:16076,312,0.00919799,0.811434,0.9379,5,10.6076
train_mnist_fbec5_00009,TERMINATED,10.99.252.58:16077,80,0.0932895,0.112504,0.9439,5,8.12483


2022-02-25 17:34:44,332	INFO tune.py:636 -- Total run time: 62.62 seconds (51.94 seconds for the tuning loop).


Best hyperparameters found were:  {'threads': 2, 'lr': 0.024989308709521076, 'momentum': 0.8382026024150805, 'hidden': 472}
