In [1]:
from ray import tune
#import ray.rllib.agents.ppo as ppo
import ray.rllib.agents.sac as sac
from ray.tune.logger import pretty_print
from ray.rllib.models import ModelCatalog
from ray.tune.schedulers import AsyncHyperBandScheduler 
import gym

In [2]:
config = sac.DEFAULT_CONFIG.copy()
config["num_gpus"] = 1
config["log_level"] = "INFO"
config["framework"] = "tf2"
config["eager_tracing"] = True
config["replay_buffer_config"]["capacity"] = int(1e4)
config["optimization"]["actor_learning_rate"] = tune.uniform(1e-4,4e-4)
config["optimization"]["critic_learning_rate"] = tune.uniform(1e-4,4e-4)
config["optimization"]["entropy_learning_rate"] = tune.uniform(1e-4,4e-4)
#config["vf_loss_coeff"] = tune.uniform(0.25,1.0)
config["env"] = "SpaceInvaders-v4"

In [3]:
asha_scheduler = AsyncHyperBandScheduler(
    time_attr='training_iteration',
    metric='episode_reward_mean',
    mode='max',
    max_t=100,
    grace_period=10,
    reduction_factor=3,
    brackets=1)

In [None]:
analysis = tune.run(
    "SAC", 
    name="atari_sac_test_1",
    config=config,
    scheduler=asha_scheduler,
    # max_concurrent_trials=4,
    num_samples=5,
    local_dir="atari_test_sac_results",
    #checkpoint_freq=10,
    checkpoint_at_end=True,
    max_failures=5,
    # reuse_actors=True, 
    resume="AUTO",
    stop={"episode_reward_mean": 200}
    )

In [5]:
checkpoints = analysis.get_trial_checkpoints_paths(
    trial=analysis.get_best_trial("episode_reward_mean","max"))

In [6]:
print(checkpoints[0][0])

/home/lupus/atari_testing/atari_test_sac_results/atari_sac_test_1/SAC_SpaceInvaders-v4_7be6c_00004_4_actor_learning_rate=0.0003,critic_learning_rate=0.0004,entropy_learning_rate=0.0004_2022-08-02_14-25-10/checkpoint_000005/checkpoint-5


In [7]:
best_config = analysis.get_best_config(metric="episode_reward_mean", mode="max")

In [None]:
agent = sac.SACTrainer(env="SpaceInvaders-v4", config=best_config)

In [None]:
agent.restore(checkpoints[0][0])

In [10]:
policy = agent.get_policy()

In [14]:
model = policy.model.action_model.base_model

In [15]:
model.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 observations (InputLayer)      [(None, 84, 84, 4)]  0           []                               
                                                                                                  
 conv1 (Conv2D)                 (None, 21, 21, 16)   4112        ['observations[0][0]']           
                                                                                                  
 conv2 (Conv2D)                 (None, 11, 11, 32)   8224        ['conv1[0][0]']                  
                                                                                                  
 conv3 (Conv2D)                 (None, 1, 1, 256)    991488      ['conv2[0][0]']                  
                                                                                              

In [16]:
import tensorflow as tf

In [17]:
tf.keras.utils.plot_model(model, to_file="my_model.png", show_shapes=True)

You must install pydot (`pip install pydot`) and install graphviz (see instructions at https://graphviz.gitlab.io/download/) for plot_model/model_to_dot to work.
