In [1]:
import json
import numpy as np

from sklearn.model_selection import ParameterSampler
from scipy.stats import uniform

In [2]:
parameter_grid = {
#     'epochs': [25, 50, 100, 200], # it doesn't make sense to search epochs, when we can use early stopping
    'batch_size': [2 ** n for n in range(5, 10 + 1)],
    'learning_rate': uniform(loc=10 ** -5, scale=10 ** -1), # TODO: can this be optimized with a learning rate scheduler?
    'dropout_rate': uniform(loc=0.0, scale=0.2), # TODO: can we sample from a continuout distribution?
    'latent_dim': list(range(256, 1024 + 1, 128)),
    'embedding_dim': list(range(32, 128 + 1, 8)),
    'vocabulary_size': list(range(2000, 7000 + 1, 200)),
    'max_input_seq_length': list(range(50, 200 + 1, 10)),
    'max_output_seq_length': list(range(5, 8 + 1)),
#     'learning_rate_distro': lognorm([0.01], loc=-0.8),
#     'bi_lstm': [True, False], # not supported yet
}
parameter_grid

{'batch_size': [32, 64, 128, 256, 512, 1024],
 'learning_rate': <scipy.stats._distn_infrastructure.rv_frozen at 0x7f2e19843a20>,
 'dropout_rate': <scipy.stats._distn_infrastructure.rv_frozen at 0x7f2e19843d30>,
 'latent_dim': [256, 384, 512, 640, 768, 896, 1024],
 'embedding_dim': [32, 40, 48, 56, 64, 72, 80, 88, 96, 104, 112, 120, 128],
 'vocabulary_size': [2000,
  2200,
  2400,
  2600,
  2800,
  3000,
  3200,
  3400,
  3600,
  3800,
  4000,
  4200,
  4400,
  4600,
  4800,
  5000,
  5200,
  5400,
  5600,
  5800,
  6000,
  6200,
  6400,
  6600,
  6800,
  7000],
 'max_input_seq_length': [50,
  60,
  70,
  80,
  90,
  100,
  110,
  120,
  130,
  140,
  150,
  160,
  170,
  180,
  190,
  200],
 'max_output_seq_length': [5, 6, 7, 8]}

In [3]:
def round_float_values(obj):
    return {
        key: round(value, 6) if isinstance(value, float) else value
        for key, value in obj.items()
    }

In [4]:
experiments = {
    f'experiment_{i}': round_float_values(params)
    for i, params in enumerate(ParameterSampler(
        parameter_grid,
        n_iter=20,
        random_state=0,
    ))
}

In [5]:
print(json.dumps(experiments, indent=2))

{
  "experiment_0": {
    "batch_size": 512,
    "dropout_rate": 0.118569,
    "embedding_dim": 32,
    "latent_dim": 640,
    "learning_rate": 0.085805,
    "max_input_seq_length": 120,
    "max_output_seq_length": 6,
    "vocabulary_size": 5800
  },
  "experiment_1": {
    "batch_size": 1024,
    "dropout_rate": 0.076876,
    "embedding_dim": 88,
    "latent_dim": 1024,
    "learning_rate": 0.005681,
    "max_input_seq_length": 170,
    "max_output_seq_length": 7,
    "vocabulary_size": 2200
  },
  "experiment_2": {
    "batch_size": 32,
    "dropout_rate": 0.078557,
    "embedding_dim": 104,
    "latent_dim": 896,
    "learning_rate": 0.03375,
    "max_input_seq_length": 90,
    "max_output_seq_length": 8,
    "vocabulary_size": 5200
  },
  "experiment_3": {
    "batch_size": 256,
    "dropout_rate": 0.191431,
    "embedding_dim": 32,
    "latent_dim": 512,
    "learning_rate": 0.047371,
    "max_input_seq_length": 60,
    "max_output_seq_length": 8,
    "vocabulary_size": 5800
  },

In [6]:
with open('../experiments/random_search_0.json', 'w') as f:
    json.dump(experiments, f, indent=2)