In [1]:
import os
from ray import tune
from ml_lab.train.train_nn import train_nn
from ray.air.config import RunConfig
from dotenv import load_dotenv
load_dotenv() 

root_dir = os.getenv("ROOT_DIR")

NOTE: This will take about 40 min to run and will use a lot of resources on your computer (i.e. may interfere with running other programs like webex). 

NOTE: If you get an error running the code block below that looks like `RuntimeError: b'GCS has started but no raylets have registered yet.'` run `ray start` in terminal for clearer error details, you should see something like:
```
`ImportError: Failed to import grpc on Apple Silicon. On Apple Silicon machines, try `pip uninstall grpcio; conda install grpcio`. Check out https://docs.ray.io/en/master/ray-overview/installation.html#m1-mac-apple-silicon-support for more details.
`
```
After running these two commands, you should be able to run the code

In [2]:
param_space = {
    "tune_session": True,
    "hidden_dim": tune.grid_search([90, 100, 110]),
    "batch_size": tune.grid_search([50, 100, 200]),
    "num_epochs": 50,
    "lr": tune.grid_search([0.1, 0.01, 0.001]),
    "weight_decay": tune.grid_search([0.01, 0.001, 0.0001])
}

tuner = tune.Tuner(
        tune.with_resources(
            tune.with_parameters(train_nn),
            resources={"cpu": 1}
        ),
        run_config=RunConfig(
            name="4138_nn_tuning",
            local_dir=f"{root_dir}/notebooks/ray_results"
        ),
        tune_config=tune.TuneConfig(
            metric="loss",
            mode="min",
        ),
        param_space=param_space,
    )

results = tuner.fit()

2023-04-12 12:20:30,166	INFO worker.py:1553 -- Started a local Ray instance.


0,1
Current time:,2023-04-12 13:19:17
Running for:,00:58:45.09
Memory:,6.8/8.0 GiB

Trial name,status,loc,batch_size,hidden_dim,lr,weight_decay,iter,total time (s),loss
train_nn_f2451_00000,TERMINATED,127.0.0.1:33399,50,90,0.1,0.01,50,336.048,4699.11
train_nn_f2451_00001,TERMINATED,127.0.0.1:33401,100,90,0.1,0.01,50,329.374,3023.37
train_nn_f2451_00002,TERMINATED,127.0.0.1:33402,200,90,0.1,0.01,50,331.896,2191.27
train_nn_f2451_00003,TERMINATED,127.0.0.1:33403,50,100,0.1,0.01,50,331.779,4826.19
train_nn_f2451_00004,TERMINATED,127.0.0.1:33404,100,100,0.1,0.01,50,332.41,2983.09
train_nn_f2451_00005,TERMINATED,127.0.0.1:33405,200,100,0.1,0.01,50,329.055,2260.55
train_nn_f2451_00006,TERMINATED,127.0.0.1:33406,50,110,0.1,0.01,50,331.875,4694.81
train_nn_f2451_00007,TERMINATED,127.0.0.1:33407,100,110,0.1,0.01,50,330.527,3036.8
train_nn_f2451_00008,TERMINATED,127.0.0.1:33405,200,110,0.1,0.01,50,342.78,2173.41
train_nn_f2451_00009,TERMINATED,127.0.0.1:33401,50,90,0.01,0.01,50,343.94,4814.11


[2m[36m(train_nn pid=33399)[0m 
[2m[36m(train_nn pid=33399)[0m Epoch 1 of 50 elapsed time = 7e-06 s
[2m[36m(train_nn pid=33403)[0m 
[2m[36m(train_nn pid=33403)[0m Epoch 1 of 50 elapsed time = 0.000245 s
[2m[36m(train_nn pid=33402)[0m 
[2m[36m(train_nn pid=33402)[0m Epoch 1 of 50 elapsed time = 5e-06 s
[2m[36m(train_nn pid=33404)[0m 
[2m[36m(train_nn pid=33404)[0m Epoch 1 of 50 elapsed time = 1e-05 s
[2m[36m(train_nn pid=33407)[0m 
[2m[36m(train_nn pid=33407)[0m Epoch 1 of 50 elapsed time = 7e-06 s
[2m[36m(train_nn pid=33406)[0m 
[2m[36m(train_nn pid=33406)[0m Epoch 1 of 50 elapsed time = 4e-06 s
[2m[36m(train_nn pid=33401)[0m 
[2m[36m(train_nn pid=33401)[0m Epoch 1 of 50 elapsed time = 4e-06 s
[2m[36m(train_nn pid=33405)[0m 
[2m[36m(train_nn pid=33405)[0m Epoch 1 of 50 elapsed time = 7e-06 s
[2m[36m(train_nn pid=33399)[0m epoch: 1 train loss: 48.214 train accuracy: 0.297
[2m[36m(train_nn pid=33399)[0m epoch: 1 test loss: 12.493 test

Trial name,accuracy,date,done,episodes_total,experiment_id,experiment_tag,hostname,iterations_since_restore,loss,node_ip,pid,time_since_restore,time_this_iter_s,time_total_s,timestamp,timesteps_since_restore,timesteps_total,training_iteration,trial_id,warmup_time
train_nn_f2451_00000,0.554187,2023-04-12_12-26-10,True,,68f7eda2742c4b64a8c0090ceef35f56,"0_batch_size=50,hidden_dim=90,lr=0.1000,weight_decay=0.0100",Jills-MBP.fios-router.home,50,4699.11,127.0.0.1,33399,336.048,6.19456,336.048,1681316770,0,,50,f2451_00000,0.00302625
train_nn_f2451_00001,0.546798,2023-04-12_12-26-08,True,,67ad5d52fb17408ca98c6e6c187f5298,"1_batch_size=100,hidden_dim=90,lr=0.1000,weight_decay=0.0100",Jills-MBP.fios-router.home,50,3023.37,127.0.0.1,33401,329.374,6.41663,329.374,1681316768,0,,50,f2451_00001,0.0086472
train_nn_f2451_00002,0.470443,2023-04-12_12-26-10,True,,a09428b2cc1744408f2ffefc870fbc6c,"2_batch_size=200,hidden_dim=90,lr=0.1000,weight_decay=0.0100",Jills-MBP.fios-router.home,50,2191.27,127.0.0.1,33402,331.896,6.87865,331.896,1681316770,0,,50,f2451_00002,0.00609493
train_nn_f2451_00003,0.460591,2023-04-12_12-26-10,True,,135dede187b842f9a03137c40d663b82,"3_batch_size=50,hidden_dim=100,lr=0.1000,weight_decay=0.0100",Jills-MBP.fios-router.home,50,4826.19,127.0.0.1,33403,331.779,6.46934,331.779,1681316770,0,,50,f2451_00003,0.00807095
train_nn_f2451_00004,0.536946,2023-04-12_12-26-11,True,,f4f57953929246988a1c5a64691d5714,"4_batch_size=100,hidden_dim=100,lr=0.1000,weight_decay=0.0100",Jills-MBP.fios-router.home,50,2983.09,127.0.0.1,33404,332.41,6.36659,332.41,1681316771,0,,50,f2451_00004,0.00423408
train_nn_f2451_00005,0.37931,2023-04-12_12-26-08,True,,e3c79888a8d04fabb3b4669579bc3cf5,"5_batch_size=200,hidden_dim=100,lr=0.1000,weight_decay=0.0100",Jills-MBP.fios-router.home,50,2260.55,127.0.0.1,33405,329.055,6.50583,329.055,1681316768,0,,50,f2451_00005,0.00359607
train_nn_f2451_00006,0.519704,2023-04-12_12-26-10,True,,451ccda18ed048d48aa7f95cb84d7aa5,"6_batch_size=50,hidden_dim=110,lr=0.1000,weight_decay=0.0100",Jills-MBP.fios-router.home,50,4694.81,127.0.0.1,33406,331.875,6.39731,331.875,1681316770,0,,50,f2451_00006,0.00397015
train_nn_f2451_00007,0.463054,2023-04-12_12-26-09,True,,e641c1811ba941c49c27f37b19d09e53,"7_batch_size=100,hidden_dim=110,lr=0.1000,weight_decay=0.0100",Jills-MBP.fios-router.home,50,3036.8,127.0.0.1,33407,330.527,6.45185,330.527,1681316769,0,,50,f2451_00007,0.0154738
train_nn_f2451_00008,0.470443,2023-04-12_12-31-51,True,,e3c79888a8d04fabb3b4669579bc3cf5,"8_batch_size=200,hidden_dim=110,lr=0.1000,weight_decay=0.0100",Jills-MBP.fios-router.home,50,2173.41,127.0.0.1,33405,342.78,7.51541,342.78,1681317111,0,,50,f2451_00008,0.00359607
train_nn_f2451_00009,0.480296,2023-04-12_12-31-53,True,,67ad5d52fb17408ca98c6e6c187f5298,"9_batch_size=50,hidden_dim=90,lr=0.0100,weight_decay=0.0100",Jills-MBP.fios-router.home,50,4814.11,127.0.0.1,33401,343.94,8.14067,343.94,1681317113,0,,50,f2451_00009,0.0086472


[2m[36m(train_nn pid=33399)[0m 
[2m[36m(train_nn pid=33399)[0m Epoch 2 of 50 elapsed time = 8.321272 s
[2m[36m(train_nn pid=33405)[0m epoch: 1 train loss: 15.581 train accuracy: 0.098
[2m[36m(train_nn pid=33404)[0m epoch: 1 train loss: 25.978 train accuracy: 0.258
[2m[36m(train_nn pid=33403)[0m epoch: 1 train loss: 47.230 train accuracy: 0.367
[2m[36m(train_nn pid=33407)[0m epoch: 1 train loss: 26.079 train accuracy: 0.242
[2m[36m(train_nn pid=33401)[0m epoch: 1 train loss: 26.388 train accuracy: 0.211
[2m[36m(train_nn pid=33402)[0m epoch: 1 train loss: 15.381 train accuracy: 0.148
[2m[36m(train_nn pid=33406)[0m epoch: 1 train loss: 48.175 train accuracy: 0.298
[2m[36m(train_nn pid=33405)[0m epoch: 1 test loss: 6.115 test accuracy: 0.266
[2m[36m(train_nn pid=33405)[0m 
[2m[36m(train_nn pid=33405)[0m Epoch 2 of 50 elapsed time = 7.108836 s
[2m[36m(train_nn pid=33404)[0m epoch: 1 test loss: 7.639 test accuracy: 0.367
[2m[36m(train_nn pid=33404)[0

2023-04-12 13:19:17,072	INFO tune.py:798 -- Total run time: 3525.21 seconds (3525.08 seconds for the tuning loop).


In [4]:
best_result = results.get_best_result("loss", "min")
print("Best trial config: {}".format(best_result.config))
print(f"Best trial final validation loss: {best_result.metrics['loss']}")
print(f"Best trial final validation accuracy: {best_result.metrics['accuracy']}")

Best trial config: {'tune_session': True, 'hidden_dim': 110, 'batch_size': 200, 'num_epochs': 50, 'lr': 0.001, 'weight_decay': 0.0001}
Best trial final validation loss: 1758.3514425754547
Best trial final validation accuracy: 0.8620689511299133


You can view the results in the Tensorboard UI by running the following command in terminal:
```
tensorboard --logdir=notebooks/ray_results/4138_nn_tuning
```

You will see the following output: <br><br>
<img src="../img/tensorboard_01.png" height=100>

When you navigate to the link, you will be able to view the results of all of your tune trials: <br><br>
TIME SERIES Tab: <br><br>
<img src="../img/tensorboard_02.png">
<br><br>
HPARAMS Tab: <br><br>
<img src="../img/tensorboard_03.png">
