# 환경/정책 준비

In [1]:
from maze_gym_env import Environment
import time
import numpy as np
import random 

class Policy:

    def __init__(self, env):
        """A Policy suggests actions based on the current state.
        We do this by tracking the value of each state-action pair.
        """
        self.state_action_table = [
            [0 for _ in range(env.action_space.n)]
            for _ in range(env.observation_space.n)
        ]
        self.action_space = env.action_space

    def get_action(self, state, explore=True, epsilon=0.1):
        """Explore randomly or exploit the best value currently available."""
        if explore and random.uniform(0, 1) < epsilon:
            return self.action_space.sample()
        return np.argmax(self.state_action_table[state])


class Simulation(object):
    def __init__(self, env):
        """Simulates rollouts of an environment, given a policy to follow."""
        self.env = env

    def rollout(self, policy, render=False, explore=True, epsilon=0.1):
        """Returns experiences for a policy rollout."""
        experiences = []
        state = self.env.reset()
        done = False
        while not done:
            action = policy.get_action(state, explore, epsilon)
            next_state, reward, done, info = self.env.step(action)
            experiences.append([state, action, reward, next_state])
            state = next_state
            if render:
                time.sleep(0.05)
                self.env.render()

        return experiences


def update_policy(policy, experiences, weight=0.1, discount_factor=0.9):
    """Updates a given policy with a list of (state, action, reward, state)
    experiences."""
    for state, action, reward, next_state in experiences:
        next_max = np.max(policy.state_action_table[next_state])
        value = policy.state_action_table[state][action]
        new_value = (1 - weight) * value + weight * \
                    (reward + discount_factor * next_max)
        policy.state_action_table[state][action] = new_value


def train_policy(env, num_episodes=10000, weight=0.1, discount_factor=0.9):
    """Training a policy by updating it with rollout experiences."""
    policy = Policy(env)
    sim = Simulation(env)
    for _ in range(num_episodes):
        experiences = sim.rollout(policy)
        update_policy(policy, experiences, weight, discount_factor)

    return policy


def evaluate_policy(env, policy, num_episodes=10):
    """Evaluate a trained policy through rollouts."""
    simulation = Simulation(env)
    steps = 0

    for _ in range(num_episodes):
        experiences = simulation.rollout(policy, render=False, explore=False)
        steps += len(experiences)

    print(f"{steps / num_episodes} steps on average "
          f"for a total of {num_episodes} episodes.")

    return steps / num_episodes

# 튠 준비

### random search 기반

In [5]:
from ray import tune


# Random search 기반
search_space = {
    "weight": tune.uniform(0,1),
    "discount_factor": tune.uniform(0,1),
}


def tune_objective(config):
    environment = Environment()
    policy = train_policy(
        environment,
        weight=config['weight'],
        discount_factor=config['discount_factor']
    )
    score = evaluate_policy(environment, policy)
    return {"score": score}

analysis = tune.run(tune_objective, config=search_space)
print(analysis.get_best_config(metric="score", mode="min"))

2024-03-09 15:20:17,183	INFO worker.py:1529 -- Started a local Ray instance. View the dashboard at [1m[32m127.0.0.1:8265 [39m[22m


0,1
Current time:,2024-03-09 15:20:20
Running for:,00:00:02.06
Memory:,6.0/15.6 GiB

Trial name,status,loc,discount_factor,weight,iter,total time (s),score
tune_objective_197fc_00000,TERMINATED,172.30.1.49:46014,0.941825,0.00258668,1,0.861975,8


Trial name,date,done,episodes_total,experiment_id,experiment_tag,hostname,iterations_since_restore,node_ip,pid,score,time_since_restore,time_this_iter_s,time_total_s,timestamp,timesteps_since_restore,timesteps_total,training_iteration,trial_id,warmup_time
tune_objective_197fc_00000,2024-03-09_15-20-20,True,,18d512a46a9d4f58ab93cc2b8cb1e893,"0_discount_factor=0.9418,weight=0.0026",hoondori-ML,1,172.30.1.49,46014,8,0.861975,0.861975,0.861975,1709965220,0,,1,197fc_00000,0.00174618


[2m[36m(tune_objective pid=46014)[0m 8.0 steps on average for a total of 10 episodes.


2024-03-09 15:20:20,955	INFO tune.py:762 -- Total run time: 2.92 seconds (2.06 seconds for the tuning loop).


{'weight': 0.0025866808105523553, 'discount_factor': 0.9418254178861942}


### BayesOpt 기반

In [11]:
# !pip install bayesian-optimization
from ray import tune
from ray.tune.suggest.bayesopt import BayesOptSearch

algo = BayesOptSearch(random_search_steps=4)
analysis = tune.run(
    tune_objective, 
    config=search_space,
    metric="score",
    mode="min",
    search_alg=algo,
    stop={"training_iteration": 10}
)
print(analysis.get_best_config(metric="score", mode="min"))

0,1
Current time:,2024-03-09 15:23:06
Running for:,00:00:02.01
Memory:,5.9/15.6 GiB

Trial name,status,loc,discount_factor,weight,iter,total time (s),score
tune_objective_10b6c7d2,TERMINATED,172.30.1.49:54323,0.37454,0.950714,1,0.826744,8


Trial name,date,done,episodes_total,experiment_id,experiment_tag,hostname,iterations_since_restore,node_ip,pid,score,time_since_restore,time_this_iter_s,time_total_s,timestamp,timesteps_since_restore,timesteps_total,training_iteration,trial_id,warmup_time
tune_objective_10b6c7d2,2024-03-09_15-23-06,True,,a9d559a4db9544e7a6a16ee201217bb0,"1_discount_factor=0.3745,weight=0.9507",hoondori-ML,1,172.30.1.49,54323,8,0.826744,0.826744,0.826744,1709965386,0,,1,10b6c7d2,0.00205493


2024-03-09 15:23:06,735	INFO tune.py:762 -- Total run time: 2.12 seconds (2.01 seconds for the tuning loop).


[2m[36m(tune_objective pid=54323)[0m 8.0 steps on average for a total of 10 episodes.
{'weight': 0.9507143064099162, 'discount_factor': 0.3745401188473625}


### Scheduler 기반

In [14]:
# step 별로 score를 report하면서 hyperband가 좋은 것들을 조기 선별

def objective(config):
    for step in range(30):
        score = config['weight'] * (step**0.5) + config['bias']
        tune.report(score=score)
search_space = {"weight": tune.uniform(0,1), "bias": tune.uniform(0,1)}

from ray.tune.schedulers import HyperBandScheduler
scheduler = HyperBandScheduler(metric="score", mode="min")
analysis = tune.run(
    objective,
    config=search_space,
    scheduler=scheduler,
    num_samples=6,
)
print(analysis.get_best_config(metric="score", mode="min"))

0,1
Current time:,2024-03-09 15:31:20
Running for:,00:00:02.61
Memory:,5.6/15.6 GiB

Trial name,status,loc,bias,weight,iter,total time (s),score
objective_a2f02_00000,TERMINATED,172.30.1.49:77469,0.417569,0.792994,30,0.0593717,4.68797
objective_a2f02_00001,TERMINATED,172.30.1.49:77543,0.1488,0.66715,30,0.138234,3.74151
objective_a2f02_00002,TERMINATED,172.30.1.49:77545,0.622139,0.965383,30,0.111496,5.82089
objective_a2f02_00003,TERMINATED,172.30.1.49:77547,0.932602,0.380265,30,0.116657,2.98039
objective_a2f02_00004,TERMINATED,172.30.1.49:77549,0.523046,0.216066,30,0.134591,1.6866
objective_a2f02_00005,TERMINATED,172.30.1.49:77551,0.398056,0.405861,30,0.135211,2.58368


Trial name,date,done,episodes_total,experiment_id,experiment_tag,hostname,iterations_since_restore,node_ip,pid,score,time_since_restore,time_this_iter_s,time_total_s,timestamp,timesteps_since_restore,timesteps_total,training_iteration,trial_id,warmup_time
objective_a2f02_00000,2024-03-09_15-31-19,True,,417b5f7e6ecb4768a47c7b68ed5e7a46,"0_bias=0.4176,weight=0.7930",hoondori-ML,30,172.30.1.49,77469,4.68797,0.0593717,0.00127816,0.0593717,1709965879,0,,30,a2f02_00000,0.00204921
objective_a2f02_00001,2024-03-09_15-31-20,True,,2586cfb4c1e14f5caa204765df60ff8e,"1_bias=0.1488,weight=0.6672",hoondori-ML,30,172.30.1.49,77543,3.74151,0.138234,0.000913382,0.138234,1709965880,0,,30,a2f02_00001,0.00179505
objective_a2f02_00002,2024-03-09_15-31-20,True,,d1f1f64733e64791a2462ee5c7b3a175,"2_bias=0.6221,weight=0.9654",hoondori-ML,30,172.30.1.49,77545,5.82089,0.111496,0.00297379,0.111496,1709965880,0,,30,a2f02_00002,0.0021646
objective_a2f02_00003,2024-03-09_15-31-20,True,,366551c8343042eaac1832aa7ab3d137,"3_bias=0.9326,weight=0.3803",hoondori-ML,30,172.30.1.49,77547,2.98039,0.116657,0.00206447,0.116657,1709965880,0,,30,a2f02_00003,0.00335288
objective_a2f02_00004,2024-03-09_15-31-20,True,,257ea5573b364571839e211fc5878b0a,"4_bias=0.5230,weight=0.2161",hoondori-ML,30,172.30.1.49,77549,1.6866,0.134591,0.00574493,0.134591,1709965880,0,,30,a2f02_00004,0.00156546
objective_a2f02_00005,2024-03-09_15-31-20,True,,765babd34ba141149247f46c8101b4bd,"5_bias=0.3981,weight=0.4059",hoondori-ML,30,172.30.1.49,77551,2.58368,0.135211,0.00127244,0.135211,1709965880,0,,30,a2f02_00005,0.00168085


2024-03-09 15:31:20,839	INFO tune.py:762 -- Total run time: 2.72 seconds (2.61 seconds for the tuning loop).


{'weight': 0.21606602085422344, 'bias': 0.5230459641532366}


# 튠의 구성과 실행

-  리소스 지정
-  콜백과 지표 

In [25]:
from ray import tune
from ray.tune import Callback
from ray.tune.logger import pretty_print


class PrintResultCallback(Callback):
    def on_trial_result(self, iteration, trials, trial, result, **info):
        print(f"Trial {trial} in iteration {iteration}, "
              f"got result: {result['score']}")


def objective(config):
    for step in range(30):
        score = config["weight"] * (step ** 0.5) + config["bias"]
        tune.report(score=score, step=step, more_metrics={})



search_space = {"weight": tune.uniform(0, 1), "bias": tune.uniform(0, 1)}

analysis = tune.run(
    objective,
    config=search_space,
    mode="min",
    metric="score",
    callbacks=[PrintResultCallback()])

best = analysis.best_trial
print(pretty_print(best.last_result))


0,1
Current time:,2024-03-09 16:29:45
Running for:,00:00:01.16
Memory:,6.0/15.6 GiB

Trial name,status,loc,bias,weight,iter,total time (s),score,step
objective_cceb7_00000,TERMINATED,172.30.1.49:238368,0.184593,0.959199,30,0.0343657,5.35004,29


Trial objective_cceb7_00000 in iteration 1, got result: 0.1845929847711698


Trial name,date,done,episodes_total,experiment_id,experiment_tag,hostname,iterations_since_restore,more_metrics,node_ip,pid,score,step,time_since_restore,time_this_iter_s,time_total_s,timestamp,timesteps_since_restore,timesteps_total,training_iteration,trial_id,warmup_time
objective_cceb7_00000,2024-03-09_16-29-45,True,,95b320faa1e04c6d858a206a17e1aa1f,"0_bias=0.1846,weight=0.9592",hoondori-ML,30,{},172.30.1.49,238368,5.35004,29,0.0343657,0.00105476,0.0343657,1709969385,0,,30,cceb7_00000,0.00169587


Trial objective_cceb7_00000 in iteration 2, got result: 1.1437915527876317
Trial objective_cceb7_00000 in iteration 3, got result: 1.541104608668902
Trial objective_cceb7_00000 in iteration 4, got result: 1.845973639122993
Trial objective_cceb7_00000 in iteration 5, got result: 2.1029901208040935
Trial objective_cceb7_00000 in iteration 6, got result: 2.329426186776434
Trial objective_cceb7_00000 in iteration 7, got result: 2.5341400384198054
Trial objective_cceb7_00000 in iteration 8, got result: 2.722393853672002
Trial objective_cceb7_00000 in iteration 9, got result: 2.897616232566634
Trial objective_cceb7_00000 in iteration 10, got result: 3.062188688820555
Trial objective_cceb7_00000 in iteration 11, got result: 3.217845188075127
Trial objective_cceb7_00000 in iteration 12, got result: 3.3658947343279673
Trial objective_cceb7_00000 in iteration 13, got result: 3.5073542934748163
Trial objective_cceb7_00000 in iteration 14, got result: 3.6430326051061557
Trial objective_cceb7_00000

2024-03-09 16:29:45,796	INFO tune.py:762 -- Total run time: 1.27 seconds (1.16 seconds for the tuning loop).


date: 2024-03-09_16-29-45
done: true
experiment_id: 95b320faa1e04c6d858a206a17e1aa1f
experiment_tag: 0_bias=0.1846,weight=0.9592
hostname: hoondori-ML
iterations_since_restore: 30
more_metrics: {}
node_ip: 172.30.1.49
pid: 238368
score: 5.350035356307232
step: 29
time_since_restore: 0.03436565399169922
time_this_iter_s: 0.0010547637939453125
time_total_s: 0.03436565399169922
timestamp: 1709969385
timesteps_since_restore: 0
training_iteration: 30
trial_id: cceb7_00000
warmup_time: 0.001695871353149414



# 체크포인트, 중단 및 재개

In [26]:
def stopper(trial_id, result):
    return result["score"] > 2
    
analysis = tune.run(
    objective,
    config=search_space,
    mode="min",
    metric="score",
    callbacks=[PrintResultCallback()],
    stop=stopper
)

best = analysis.best_trial
print(pretty_print(best.last_result))

0,1
Current time:,2024-03-09 16:30:01
Running for:,00:00:01.17
Memory:,5.9/15.6 GiB

Trial name,status,loc,bias,weight,iter,total time (s),score,step
objective_d6572_00000,TERMINATED,172.30.1.49:239048,0.895364,0.663923,4,0.00780439,2.04531,3


Trial objective_d6572_00000 in iteration 1, got result: 0.8953639892131926


Trial name,date,done,episodes_total,experiment_id,hostname,iterations_since_restore,more_metrics,node_ip,pid,score,step,time_since_restore,time_this_iter_s,time_total_s,timestamp,timesteps_since_restore,timesteps_total,training_iteration,trial_id,warmup_time
objective_d6572_00000,2024-03-09_16-30-01,True,,22c6aac4cbc648149f470c073acea549,hoondori-ML,4,{},172.30.1.49,239048,2.04531,3,0.00780439,0.00119162,0.00780439,1709969401,0,,4,d6572_00000,0.0021131


Trial objective_d6572_00000 in iteration 2, got result: 1.559287298758421
Trial objective_d6572_00000 in iteration 3, got result: 1.8342933379476851
Trial objective_d6572_00000 in iteration 4, got result: 2.045312893674807


2024-03-09 16:30:01,608	INFO tune.py:762 -- Total run time: 1.28 seconds (1.17 seconds for the tuning loop).


date: 2024-03-09_16-30-01
done: true
experiment_id: 22c6aac4cbc648149f470c073acea549
experiment_tag: 0_bias=0.8954,weight=0.6639
hostname: hoondori-ML
iterations_since_restore: 4
more_metrics: {}
node_ip: 172.30.1.49
pid: 239048
score: 2.045312893674807
step: 3
time_since_restore: 0.007804393768310547
time_this_iter_s: 0.0011916160583496094
time_total_s: 0.007804393768310547
timestamp: 1709969401
timesteps_since_restore: 0
training_iteration: 4
trial_id: d6572_00000
warmup_time: 0.0021131038665771484



# 튠을 활용한 머신러닝

### rllib case
- !pip install tensorflow_probability pygame

In [3]:
from ray import tune
from ray.tune import Callback
from ray.tune.logger import pretty_print

class PrintResultCallback(Callback):
    def on_trial_result(self, iteration, trials, trial, result, **info):
        print(f"Trial {trial} in iteration {iteration}, "
              f"got result: {result['episode_reward_mean']}")

analysis = tune.run(
    "DQN",
    metric="episode_reward_mean",
    mode="max",
    config={
        "env": "CartPole-v1",
        "lr": tune.uniform(1e-5, 1e-4),
        "train_batch_size": tune.choice([10000, 20000, 40000])
    },
    stop={"training_iteration": 10}
    #callbacks=[PrintResultCallback()]
)

best = analysis.best_trial
print(pretty_print(best.last_result))

0,1
Current time:,2024-03-09 16:45:12
Running for:,00:00:25.79
Memory:,6.9/15.6 GiB

Trial name,status,loc,lr,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_CartPole-v1_e6809_00000,RUNNING,172.30.1.49:302171,7.94681e-05,40000,1,1.2717,1000,24.675,85,8,24.675


2024-03-09 16:44:46,444	INFO algorithm_config.py:2503 -- Your framework setting is 'tf', meaning you are using static-graph mode. Set framework='tf2' to enable eager execution with tf2.x. You may also then want to set eager_tracing=True in order to reach similar execution speed as with static-graph mode.
2024-03-09 16:44:46,446	INFO algorithm_config.py:2503 -- Your framework setting is 'tf', meaning you are using static-graph mode. Set framework='tf2' to enable eager execution with tf2.x. You may also then want to set eager_tracing=True in order to reach similar execution speed as with static-graph mode.
[2m[36m(pid=302171)[0m   from pkg_resources import packaging
[2m[36m(pid=302171)[0m   if (distutils.version.LooseVersion(tf.__version__) <
[2m[36m(DQN pid=302171)[0m 2024-03-09 16:44:49,888	INFO algorithm_config.py:2503 -- Your framework setting is 'tf', meaning you are using static-graph mode. Set framework='tf2' to enable eager execution with tf2.x. You may also then want to

Trial name,agent_timesteps_total,counters,custom_metrics,date,done,episode_len_mean,episode_media,episode_reward_max,episode_reward_mean,episode_reward_min,episodes_this_iter,episodes_total,experiment_id,hostname,info,iterations_since_restore,node_ip,num_agent_steps_sampled,num_agent_steps_trained,num_env_steps_sampled,num_env_steps_sampled_this_iter,num_env_steps_trained,num_env_steps_trained_this_iter,num_faulty_episodes,num_healthy_workers,num_in_flight_async_reqs,num_remote_worker_restarts,num_steps_trained_this_iter,perf,pid,policy_reward_max,policy_reward_mean,policy_reward_min,sampler_perf,sampler_results,time_since_restore,time_this_iter_s,time_total_s,timers,timestamp,timesteps_since_restore,timesteps_total,training_iteration,trial_id,warmup_time
DQN_CartPole-v1_e6809_00000,1000,"{'num_env_steps_sampled': 1000, 'num_env_steps_trained': 0, 'num_agent_steps_sampled': 1000, 'num_agent_steps_trained': 0}",{},2024-03-09_16-44-52,False,24.675,{},85,24.675,8,40,40,a01a54f8ca354892bb39b1b830cb0ac0,hoondori-ML,"{'learner': {}, 'num_env_steps_sampled': 1000, 'num_env_steps_trained': 0, 'num_agent_steps_sampled': 1000, 'num_agent_steps_trained': 0}",1,172.30.1.49,1000,0,1000,1000,0,0,0,0,0,0,0,"{'cpu_util_percent': 10.3, 'ram_util_percent': 44.0}",302171,{},{},{},"{'mean_raw_obs_processing_ms': 0.4377398457560506, 'mean_inference_ms': 0.422367682823768, 'mean_action_processing_ms': 0.04112184583604872, 'mean_env_wait_ms': 0.033814709384243684, 'mean_env_render_ms': 0.0}","{'episode_reward_max': 85.0, 'episode_reward_min': 8.0, 'episode_reward_mean': 24.675, 'episode_len_mean': 24.675, 'episode_media': {}, 'episodes_this_iter': 40, 'policy_reward_min': {}, 'policy_reward_max': {}, 'policy_reward_mean': {}, 'custom_metrics': {}, 'hist_stats': {'episode_reward': [9.0, 16.0, 26.0, 27.0, 24.0, 9.0, 11.0, 29.0, 49.0, 76.0, 12.0, 85.0, 76.0, 11.0, 20.0, 12.0, 21.0, 29.0, 26.0, 32.0, 23.0, 15.0, 31.0, 15.0, 27.0, 37.0, 14.0, 35.0, 8.0, 18.0, 12.0, 16.0, 13.0, 19.0, 17.0, 12.0, 19.0, 15.0, 14.0, 27.0], 'episode_lengths': [9, 16, 26, 27, 24, 9, 11, 29, 49, 76, 12, 85, 76, 11, 20, 12, 21, 29, 26, 32, 23, 15, 31, 15, 27, 37, 14, 35, 8, 18, 12, 16, 13, 19, 17, 12, 19, 15, 14, 27]}, 'sampler_perf': {'mean_raw_obs_processing_ms': 0.4377398457560506, 'mean_inference_ms': 0.422367682823768, 'mean_action_processing_ms': 0.04112184583604872, 'mean_env_wait_ms': 0.033814709384243684, 'mean_env_render_ms': 0.0}, 'num_faulty_episodes': 0}",1.2717,1.2717,1.2717,{'training_iteration_time_ms': 1.192},1709970292,0,1000,1,e6809_00000,0.825593


2024-03-09 16:45:12,442	ERROR tune.py:758 -- Trials did not complete: [DQN_CartPole-v1_e6809_00000]
2024-03-09 16:45:12,443	INFO tune.py:762 -- Total run time: 26.00 seconds (25.79 seconds for the tuning loop).


agent_timesteps_total: 1000
counters:
  num_agent_steps_sampled: 1000
  num_agent_steps_trained: 0
  num_env_steps_sampled: 1000
  num_env_steps_trained: 0
custom_metrics: {}
date: 2024-03-09_16-44-52
done: false
episode_len_mean: 24.675
episode_media: {}
episode_reward_max: 85.0
episode_reward_mean: 24.675
episode_reward_min: 8.0
episodes_this_iter: 40
episodes_total: 40
experiment_id: a01a54f8ca354892bb39b1b830cb0ac0
experiment_tag: 0_lr=0.0001,train_batch_size=40000
hostname: hoondori-ML
info:
  learner: {}
  num_agent_steps_sampled: 1000
  num_agent_steps_trained: 0
  num_env_steps_sampled: 1000
  num_env_steps_trained: 0
iterations_since_restore: 1
node_ip: 172.30.1.49
num_agent_steps_sampled: 1000
num_agent_steps_trained: 0
num_env_steps_sampled: 1000
num_env_steps_sampled_this_iter: 1000
num_env_steps_trained: 0
num_env_steps_trained_this_iter: 0
num_faulty_episodes: 0
num_healthy_workers: 0
num_in_flight_async_reqs: 0
num_remote_worker_restarts: 0
num_steps_trained_this_iter: 0

### MNIST hpo
- !pip install pyarrow hyperopt

In [1]:

from tensorflow.keras.datasets import mnist
from tensorflow.keras.utils import to_categorical


def load_data():
    (x_train, y_train), (x_test, y_test) = mnist.load_data()
    num_classes = 10
    x_train, x_test = x_train / 255.0, x_test / 255.0
    y_train = to_categorical(y_train, num_classes)
    y_test = to_categorical(y_test, num_classes)
    return (x_train, y_train), (x_test, y_test)


#load_data()

2024-03-09 16:47:57.404644: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-03-09 16:47:57.404664: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-03-09 16:47:57.406634: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-03-09 16:47:57.413136: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [3]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Flatten, Dense, Dropout
from ray.tune.integration.keras import TuneReportCallback


def objective(config):
    (x_train, y_train), (x_test, y_test) = load_data()
    model = Sequential()
    model.add(Flatten(input_shape=(28, 28)))
    model.add(Dense(config["hidden"], activation=config["activation"]))
    model.add(Dropout(config["rate"]))
    model.add(Dense(10, activation="softmax"))
    
    model.compile(loss="categorical_crossentropy", metrics=["accuracy"])
    model.fit(x_train, y_train, batch_size=128, epochs=10,
              validation_data=(x_test, y_test),
              callbacks=[TuneReportCallback({"mean_accuracy": "accuracy"})])

In [None]:
from ray import tune
from ray.tune.suggest.hyperopt import HyperOptSearch

initial_params = [{"rate": 0.2, "hidden": 128, "activation": "relu"}]
algo = HyperOptSearch(points_to_evaluate=initial_params)

search_space = {
    "rate": tune.uniform(0.1, 0.5),
    "hidden": tune.randint(32, 512),
    "activation": tune.choice(["relu", "tanh"])
}


analysis = tune.run(
    objective,
    name="keras_hyperopt_exp",
    search_alg=algo,
    metric="mean_accuracy",
    mode="max",
    stop={"mean_accuracy": 0.99},
    num_samples=10,
    config=search_space,
)
print("Best hyperparameters found were: ", analysis.best_config)