In [20]:
import convGRU
import optuna
import coiled
import dask_optuna
import joblib
from optuna.trial import TrialState


In [7]:
# Run once to create the software env we will use for our optimization
coiled.create_software_environment(
    name="capp-ml-env",
    pip=["dask[complete]", "torch", "torchvision", "tqdm", "numpy"],
    account="gondeknp",
)

Creating new software environment
Creating new ecr build
STEP 1: FROM coiled/default:sha-6b4e896
STEP 2: COPY environment.yml environment.yml
--> Using cache 9677e58e7f2aedfe88e064f754c88f2137e92f808d55b0abedb8ab3ea963a6ec
--> 9677e58e7f2
STEP 3: RUN conda env update -n coiled -f environment.yml     && rm environment.yml     && conda clean --all -y     && echo "conda activate coiled" >> ~/.bashrc
--> Using cache 7e8daca8a93c82ff3f763205d1d3ad6e9b180bcb310ea2131333891a611e7ac7
--> 7e8daca8a93
--> Using cache 7c6f3cfcdf3e311364f958c9499d93fd52f1df2adbad7094ccdacfed6e5021a9
STEP 4: ENV PATH /opt/conda/envs/coiled/bin:$PATH
--> 7c6f3cfcdf3
--> 763fb985b6c
STEP 5: SHELL ["conda", "run", "-n", "coiled", "/bin/bash", "-c"]
--> Using cache 763fb985b6c0c44ff61a623086bd7947331702c19b8be66075badeef94865c54
STEP 6: COPY requirements.txt requirements.txt
--> 0f401c2b234
--> Using cache 0f401c2b234d9d5ab934c321e3820fddacfd841a1e2bf239345105a029d19fe7
--> Using cache 98ce6898e71b5d864d42c663346a6f8a7

In [15]:
%%time

# Use coiled to create a Dask cluster on AWS
cluster = coiled.Cluster(
    n_workers=2,
    software="capp-ml-env",
    worker_vm_types=['m5.xlarge'],
    # worker_gpu = 1,
    backend_options = {
        'region':'us-east-2'
    }
)

CPU times: user 25 s, sys: 623 ms, total: 25.6 s
Wall time: 2min 8s


In [16]:
# Connect my local machine to the remote cluster
from dask.distributed import Client

client = Client(cluster)
client.wait_for_workers()

client


+-------------+---------------+----------------+----------------+
| Package     | client        | scheduler      | workers        |
+-------------+---------------+----------------+----------------+
| blosc       | None          | MISSING        | MISSING        |
| cloudpickle | 2.0.0         | 2.1.0          | 2.1.0          |
| dask        | 2022.01.1     | 2022.05.0      | 2022.05.0      |
| distributed | 2022.01.1     | 2022.5.0       | 2022.5.0       |
| msgpack     | 1.0.2         | 1.0.3          | 1.0.3          |
| numpy       | 1.20.2        | 1.22.4         | 1.22.4         |
| pandas      | 1.2.3         | 1.4.2          | 1.4.2          |
| python      | 3.9.2.final.0 | 3.9.12.final.0 | 3.9.12.final.0 |
+-------------+---------------+----------------+----------------+
Notes: 
-  msgpack: Variation is ok, as long as everything is above 0.6


0,1
Connection method: Cluster object,Cluster type: coiled.ClusterBeta
Dashboard: http://3.145.47.112:8787,

0,1
Dashboard: http://3.145.47.112:8787,Workers: 2
Total threads: 8,Total memory: 30.68 GiB

0,1
Comm: tls://172.18.0.2:8786,Workers: 2
Dashboard: http://172.18.0.2:8787/status,Total threads: 8
Started: 9 minutes ago,Total memory: 30.68 GiB

0,1
Comm: tls://10.0.8.48:45925,Total threads: 4
Dashboard: http://10.0.8.48:39453/status,Memory: 15.34 GiB
Nanny: tls://10.0.8.48:45129,
Local directory: /scratch/dask-worker-space/worker-mfjvp0_q,Local directory: /scratch/dask-worker-space/worker-mfjvp0_q

0,1
Comm: tls://10.0.6.155:46111,Total threads: 4
Dashboard: http://10.0.6.155:42247/status,Memory: 15.34 GiB
Nanny: tls://10.0.6.155:34081,
Local directory: /scratch/dask-worker-space/worker-4_7t6iga,Local directory: /scratch/dask-worker-space/worker-4_7t6iga


In [22]:
# Create an Optuna study using a Dask-compatible Optuna storage class
storage = dask_optuna.DaskStorage("sqlite:////media/nick/Data/GitRepos/land-cover-prediction/output/dask_optuna.db")
# storage = dask_optuna.DaskStorage()

study = optuna.create_study(
    direction="minimize",
    storage=storage,
)

# Run 200 optimizations trial on our cluster
with joblib.parallel_backend("dask"):
    study.optimize(convGRU.objective, n_trials=200, n_jobs=-1)


TypeError: Can't instantiate abstract class DaskStorage with abstract methods get_study_directions, set_study_directions, set_trial_values

In [None]:
pruned_trials = study.get_trials(deepcopy=False, states=[TrialState.PRUNED])
complete_trials = study.get_trials(deepcopy=False, states=[TrialState.COMPLETE])

print("Study statistics: ")
print("  Number of finished trials: ", len(study.trials))
print("  Number of pruned trials: ", len(pruned_trials))
print("  Number of complete trials: ", len(complete_trials))

print("Best trial:")
trial = study.best_trial

print("  Value: ", trial.value)

print("  Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))

fig = optuna.visualization.plot_param_importances(study)
fig.show()