In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib inline
%cd ..

/home/ubuntu/sustaingym


In [2]:
from typing import Callable, Optional, Union

import gymnasium as gym
import ray
from ray import tune
from ray.rllib.algorithms import ppo, AlgorithmConfig
from ray.tune.logger import pretty_print
from ray.tune.registry import register_env

from sustaingym.envs.evcharging import EVChargingEnv, RealTraceGenerator, GMMsTraceGenerator, DiscreteActionWrapper
from sustaingym.envs.evcharging.event_generation import AbstractTraceGenerator
from sustaingym.envs.evcharging.utils import \
    DATE_FORMAT, DEFAULT_PERIOD_TO_RANGE, DATE_FORMAT, SiteStr

from gymnasium.wrappers import TimeLimit


###
NUM_SUBPROCESSES = 4
TIMESTEPS = 250_000
EVAL_FREQ = 10_000
SAMPLE_EVAL_PERIODS = {
    'Summer 2019':   ('2019-07-01', '2019-07-14'),
    'Fall 2019':     ('2019-11-04', '2019-11-17'),
    'Spring 2020':   ('2020-04-06', '2020-04-19'),
    'Summer 2021':   ('2021-07-05', '2021-07-18'),
}

def get_env(full: bool, real_trace: bool, dp: str, site: SiteStr, discrete: bool = False, seed: int=None) -> Callable:
    """Return environment.

    Args:
        full: if True, use full season; otherwise, use sample 2 weeks
        real_trace: choice of generator
        dp: 'Summer 2019', 'Fall 2019', 'Spring 2020', 'Summer 2021'
        site: 'caltech' or 'jpl'
        discrete: whether to wrap environment in discrete action wrapper
        seed: seed for GMMs generator
    
    Returns:
        Callable of environment
    """
    date_period = DEFAULT_PERIOD_TO_RANGE[dp] if full else SAMPLE_EVAL_PERIODS[dp]

    def _get_env() -> EVChargingEnv:
        if real_trace:
            gen: AbstractTraceGenerator = RealTraceGenerator(site, date_period)
        else:
            gen = GMMsTraceGenerator(site, date_period, seed=seed)
        
        if discrete:
            return TimeLimit(DiscreteActionWrapper(EVChargingEnv(gen)), max_episode_steps=288)
        else:
            return TimeLimit(EVChargingEnv(gen), max_episode_steps=288)
    return _get_env

In [3]:
from ray import tune
from ray.air import session
from ray.rllib.algorithms import ppo, AlgorithmConfig

# ray.init(num_cpus=3)
register_env("my_env", lambda config: get_env(**config)())

train_config = (
    ppo.PPOConfig()
    .environment("my_env", env_config={
        "full": True,
        "real_trace": False,
        "dp": "Summer 2019",
        "site": "caltech",
        "discrete": False,
        "seed": 123
    })
    .framework("tf2")
)
algo = train_config.build(env="my_env")

2023-04-06 04:43:52,816	INFO worker.py:1553 -- Started a local Ray instance.
2023-04-06 04:44:01,653	INFO trainable.py:172 -- Trainable.setup took 10.792 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.


In [4]:
from sustaingym.algorithms.evcharging.baselines import RLLibAlgorithm

env = get_env(full=False, real_trace=True, dp='Summer 2019', site='caltech', discrete=False, seed=True)()
rllib_algo = RLLibAlgorithm(env, algo)
reward_breakdown = rllib_algo.run(14).to_dict('list')

env observation space:  (146,)


100%|██████████| 14/14 [01:46<00:00,  7.63s/it]


In [2]:


###

from ray import tune
from ray.air import session


def trainable(config: dict):
    checkpoint_dir = tune.get_trial_dir()
    print(checkpoint_dir)
    print(config)

def trainable(config: dict):
    checkpoint_dir = tune.get_trial_dir()
    print(checkpoint_dir)
    print(config)

    train_config = (
        ppo.PPOConfig()
        .environment("my_env", env_config={
            "full": True,
            "real_trace": False,
            "dp": "Summer 2019",
            "site": "caltech",
            "discrete": False,
            "seed": 123
        })
        .framework("tf2")
    )
    algo = train_config.build(env="my_env")

    for i in range(2):
        train_results = algo.train()

        algo.

def experiment(config):

    algo = train_config.build(env="my_env")
    print("algo built")
    for i in range(1):
        print("begin training algo")
        train_results = algo.train()
        print("done training algo")
        print(train_results['agent_timesteps_total'])
        print(train_results['custom_metrics'])
        print(train_results['episode_reward_max'])
        print(train_results['episode_reward_mean'])
        print(train_results['episode_reward_min'])

        # print(pretty_print(train_results))
        algo.save(checkpoint_dir)
        print(f"Checkpoint saved in directory {checkpoint_dir}")
        tune.report({**train_results, "a": 2})
    algo.stop()

    # Manual Eval
    eval_config = (
        ppo.PPOConfig()
        .environment("my_env", env_config={
            "full": False,
            "real_trace": True,
            "dp": "Summer 2019",
            "site": "caltech",
            "discrete": False,
            "seed": 123
        })
    )
    eval_algo = eval_config.build(env="my_env")
    eval_algo.load_checkpoint(checkpoint_dir)
    env = eval_algo.workers.local_worker().env

    obs, info = env.reset()
    done = False
    eval_results = {"eval_reward": 0, "eval_eps_length": 0}
    while not done:
        action = eval_algo.compute_single_action(obs)
        next_obs, reward, done, truncated, info = env.step(action)
        eval_results["eval_reward"] += reward
        eval_results["eval_eps_length"] += 1
    eval_algo.stop()
    results = {**train_results, **eval_results}
    print(results)
    tune.report({**results, "a": 3})

import os

ray.init(num_cpus=3)
register_env("my_env", lambda config: get_env(**config)())

resources = ppo.PPO.default_resource_request(ppo.PPOConfig())

tuner = tune.Tuner(
    # experiment
    tune.with_resources(experiment, resources=resources),
    param_space={}
)
final_results = tuner.fit()

print(final_results)

2023-04-05 22:28:37,713	INFO worker.py:1553 -- Started a local Ray instance.
2023-04-05 22:28:38,690	INFO algorithm_config.py:2899 -- Your framework setting is 'tf', meaning you are using static-graph mode. Set framework='tf2' to enable eager execution with tf2.x. You may also then want to set eager_tracing=True in order to reach similar execution speed as with static-graph mode.
2023-04-05 22:28:38,691	INFO algorithm_config.py:2899 -- Your framework setting is 'tf', meaning you are using static-graph mode. Set framework='tf2' to enable eager execution with tf2.x. You may also then want to set eager_tracing=True in order to reach similar execution speed as with static-graph mode.


0,1
Current time:,2023-04-05 22:30:45
Running for:,00:02:06.52
Memory:,2.7/15.3 GiB

Trial name,# failures,error file
experiment_3628c_00000,1,/home/ubuntu/ray_results/experiment_2023-04-05_22-28-38/experiment_3628c_00000_0_2023-04-05_22-28-38/error.txt

Trial name,status,loc,iter,total time (s)
experiment_3628c_00000,ERROR,172.31.19.228:31245,1,113.219


[2m[36m(experiment pid=31245)[0m 2023-04-05 22:28:44,113	INFO algorithm_config.py:2888 -- Executing eagerly (framework='tf2'), with eager_tracing=tf2. For production workloads, make sure to set eager_tracing=True  in order to match the speed of tf-static-graph (framework='tf'). For debugging purposes, `eager_tracing=False` is the best choice.
[2m[36m(experiment pid=31245)[0m 2023-04-05 22:28:44,280	INFO algorithm.py:506 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.


[2m[36m(experiment pid=31245)[0m /home/ubuntu/ray_results/experiment_2023-04-05_22-28-38/experiment_3628c_00000_0_2023-04-05_22-28-38/
[2m[36m(experiment pid=31245)[0m {}




[2m[36m(experiment pid=31245)[0m algo built
[2m[36m(experiment pid=31245)[0m begin training algo




Trial name,_metric,date,done,episodes_total,experiment_id,hostname,iterations_since_restore,node_ip,pid,time_since_restore,time_this_iter_s,time_total_s,timestamp,timesteps_since_restore,timesteps_total,training_iteration,trial_id,warmup_time
experiment_3628c_00000,"{'custom_metrics': {}, 'episode_media': {}, 'info': {'learner': {'default_policy': {'learner_stats': {'cur_kl_coeff': 0.20000000298023224, 'cur_lr': 4.999999873689376e-05, 'total_loss': 0.17219092, 'policy_loss': -0.07289077, 'vf_loss': 0.24229585, 'vf_explained_var': 0.49599043, 'kl': 0.013929113, 'entropy': 76.49437, 'entropy_coeff': 0.0}, 'custom_metrics': {}, 'num_agent_steps_trained': 125.0, 'num_grad_updates_lifetime': 480.5, 'diff_num_grad_updates_vs_sampler_policy': 479.5}}, 'num_env_steps_sampled': 4000, 'num_env_steps_trained': 4000, 'num_agent_steps_sampled': 4000, 'num_agent_steps_trained': 4000}, 'sampler_results': {'episode_reward_max': 3.93224650215404, 'episode_reward_min': 0.7552686244731401, 'episode_reward_mean': 2.4892613019999303, 'episode_len_mean': 288.0, 'episode_media': {}, 'episodes_this_iter': 12, 'policy_reward_min': {}, 'policy_reward_max': {}, 'policy_reward_mean': {}, 'custom_metrics': {}, 'hist_stats': {'episode_reward': [3.0030362793230005, 3.93224650215404, 3.4619184923529542, 2.742651249146179, 2.4252262847728034, 0.7807668284229972, 1.9450977753402676, 2.5155639745967204, 0.7552686244731401, 2.5493733043664486, 2.1435935163188797, 3.6163927927317348], 'episode_lengths': [288, 288, 288, 288, 288, 288, 288, 288, 288, 288, 288, 288]}, 'sampler_perf': {'mean_raw_obs_processing_ms': 0.5527151518616302, 'mean_inference_ms': 7.320524215221642, 'mean_action_processing_ms': 0.2390582939197516, 'mean_env_wait_ms': 22.970346556134007, 'mean_env_render_ms': 0.0}, 'num_faulty_episodes': 0, 'connector_metrics': {'ObsPreprocessorConnector_ms': 0.010246038436889648, 'StateBufferConnector_ms': 0.007273753484090169, 'ViewRequirementAgentConnector_ms': 0.15441377957661948}}, 'episode_reward_max': 3.93224650215404, 'episode_reward_min': 0.7552686244731401, 'episode_reward_mean': 2.4892613019999303, 'episode_len_mean': 288.0, 'episodes_this_iter': 12, 'policy_reward_min': {}, 'policy_reward_max': {}, 'policy_reward_mean': {}, 'hist_stats': {'episode_reward': [3.0030362793230005, 3.93224650215404, 3.4619184923529542, 2.742651249146179, 2.4252262847728034, 0.7807668284229972, 1.9450977753402676, 2.5155639745967204, 0.7552686244731401, 2.5493733043664486, 2.1435935163188797, 3.6163927927317348], 'episode_lengths': [288, 288, 288, 288, 288, 288, 288, 288, 288, 288, 288, 288]}, 'sampler_perf': {'mean_raw_obs_processing_ms': 0.5527151518616302, 'mean_inference_ms': 7.320524215221642, 'mean_action_processing_ms': 0.2390582939197516, 'mean_env_wait_ms': 22.970346556134007, 'mean_env_render_ms': 0.0}, 'num_faulty_episodes': 0, 'connector_metrics': {'ObsPreprocessorConnector_ms': 0.010246038436889648, 'StateBufferConnector_ms': 0.007273753484090169, 'ViewRequirementAgentConnector_ms': 0.15441377957661948}, 'num_healthy_workers': 2, 'num_in_flight_async_reqs': 0, 'num_remote_worker_restarts': 0, 'num_agent_steps_sampled': 4000, 'num_agent_steps_trained': 4000, 'num_env_steps_sampled': 4000, 'num_env_steps_trained': 4000, 'num_env_steps_sampled_this_iter': 4000, 'num_env_steps_trained_this_iter': 4000, 'timesteps_total': 4000, 'num_steps_trained_this_iter': 4000, 'agent_timesteps_total': 4000, 'timers': {'training_iteration_time_ms': 107066.006, 'learn_time_ms': 44783.599, 'learn_throughput': 89.318, 'synch_weights_time_ms': 4.836}, 'counters': {'num_env_steps_sampled': 4000, 'num_env_steps_trained': 4000, 'num_agent_steps_sampled': 4000, 'num_agent_steps_trained': 4000}, 'done': False, 'episodes_total': 12, 'training_iteration': 1, 'trial_id': 'default', 'experiment_id': '256cbc30557243c0a9dcb4c1fc699e5d', 'date': '2023-04-05_22-30-37', 'timestamp': 1680733837, 'time_this_iter_s': 107.0723135471344, 'time_total_s': 107.0723135471344, 'pid': 31245, 'hostname': 'ip-172-31-19-228', 'node_ip': '172.31.19.228', 'config': {'extra_python_environs_for_driver': {}, 'extra_python_environs_for_worker': {}, 'num_gpus': 0, 'num_cpus_per_worker': 1, 'num_gpus_per_worker': 0, '_fake_gpus': False, 'num_trainer_workers': 0, 'num_gpus_per_trainer_worker': 0, 'num_cpus_per_trainer_worker': 1, 'custom_resources_per_worker': {}, 'placement_strategy': 'PACK', 'eager_tracing': False, 'eager_max_retraces': 20, 'tf_session_args': {'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'local_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'env': 'my_env', 'env_config': {'full': True, 'real_trace': False, 'dp': 'Summer 2019', 'site': 'caltech', 'discrete': False, 'seed': 123}, 'observation_space': None, 'action_space': None, 'env_task_fn': None, 'render_env': False, 'clip_rewards': None, 'normalize_actions': True, 'clip_actions': False, 'disable_env_checking': False, 'is_atari': False, 'auto_wrap_old_gym_envs': True, 'num_envs_per_worker': 1, 'sample_collector': <class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>, 'sample_async': False, 'enable_connectors': True, 'rollout_fragment_length': 'auto', 'batch_mode': 'truncate_episodes', 'remote_worker_envs': False, 'remote_env_batch_wait_ms': 0, 'validate_workers_after_construction': True, 'ignore_worker_failures': False, 'recreate_failed_workers': False, 'restart_failed_sub_environments': False, 'num_consecutive_worker_failures_tolerance': 100, 'preprocessor_pref': 'deepmind', 'observation_filter': 'NoFilter', 'synchronize_filters': True, 'compress_observations': False, 'enable_tf1_exec_eagerly': False, 'sampler_perf_stats_ema_coef': None, 'worker_health_probe_timeout_s': 60, 'worker_restore_timeout_s': 1800, 'gamma': 0.99, 'lr': 5e-05, 'train_batch_size': 4000, 'model': {'_disable_preprocessor_api': False, '_disable_action_flattening': False, 'fcnet_hiddens': [256, 256], 'fcnet_activation': 'tanh', 'conv_filters': None, 'conv_activation': 'relu', 'post_fcnet_hiddens': [], 'post_fcnet_activation': 'relu', 'free_log_std': False, 'no_final_linear': False, 'vf_share_layers': False, 'use_lstm': False, 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action': False, 'lstm_use_prev_reward': False, '_time_major': False, 'use_attention': False, 'attention_num_transformer_units': 1, 'attention_dim': 64, 'attention_num_heads': 1, 'attention_head_dim': 32, 'attention_memory_inference': 50, 'attention_memory_training': 50, 'attention_position_wise_mlp_dim': 32, 'attention_init_gru_gate_bias': 2.0, 'attention_use_n_prev_actions': 0, 'attention_use_n_prev_rewards': 0, 'framestack': True, 'dim': 84, 'grayscale': False, 'zero_mean': True, 'custom_model': None, 'custom_model_config': {}, 'custom_action_dist': None, 'custom_preprocessor': None, 'lstm_use_prev_action_reward': -1, '_use_default_native_models': -1}, 'optimizer': {}, 'max_requests_in_flight_per_sampler_worker': 2, 'rl_trainer_class': None, '_enable_rl_trainer_api': False, '_rl_trainer_hps': RLTrainerHPs(), 'explore': True, 'exploration_config': {'type': 'StochasticSampling'}, 'policies': {'default_policy': <ray.rllib.policy.policy.PolicySpec object at 0x7fb873d96310>}, 'policy_states_are_swappable': False, 'input_config': {}, 'actions_in_input_normalized': False, 'postprocess_inputs': False, 'shuffle_buffer_size': 0, 'output': None, 'output_config': {}, 'output_compress_columns': ['obs', 'new_obs'], 'output_max_file_size': 67108864, 'offline_sampling': False, 'evaluation_interval': None, 'evaluation_duration': 10, 'evaluation_duration_unit': 'episodes', 'evaluation_sample_timeout_s': 180.0, 'evaluation_parallel_to_training': False, 'evaluation_config': None, 'off_policy_estimation_methods': {}, 'ope_split_batch_by_episode': True, 'evaluation_num_workers': 0, 'always_attach_evaluation_results': False, 'enable_async_evaluation': False, 'in_evaluation': False, 'sync_filters_on_rollout_workers_timeout_s': 60.0, 'keep_per_episode_custom_metrics': False, 'metrics_episode_collection_timeout_s': 60.0, 'metrics_num_episodes_for_smoothing': 100, 'min_time_s_per_iteration': None, 'min_train_timesteps_per_iteration': 0, 'min_sample_timesteps_per_iteration': 0, 'export_native_model_files': False, 'checkpoint_trainable_policies_only': False, 'logger_creator': None, 'logger_config': None, 'log_level': 'WARN', 'log_sys_usage': True, 'fake_sampler': False, 'seed': None, 'worker_cls': None, 'rl_module_class': None, '_enable_rl_module_api': False, '_tf_policy_handles_more_than_one_loss': False, '_disable_preprocessor_api': False, '_disable_action_flattening': False, '_disable_execution_plan_api': True, 'simple_optimizer': True, 'replay_sequence_length': None, 'horizon': -1, 'soft_horizon': -1, 'no_done_at_end': -1, 'lr_schedule': None, 'use_critic': True, 'use_gae': True, 'kl_coeff': 0.2, 'sgd_minibatch_size': 128, 'num_sgd_iter': 30, 'shuffle_sequences': True, 'vf_loss_coeff': 1.0, 'entropy_coeff': 0.0, 'entropy_coeff_schedule': None, 'clip_param': 0.3, 'vf_clip_param': 10.0, 'grad_clip': None, 'kl_target': 0.01, 'vf_share_layers': -1, 'lambda': 1.0, 'input': 'sampler', 'multiagent': {'policies': {'default_policy': (None, None, None, None)}, 'policy_mapping_fn': <function AlgorithmConfig.__init__.<locals>.<lambda> at 0x7fb873efc160>, 'policies_to_train': None, 'policy_map_capacity': 100, 'policy_map_cache': -1, 'count_steps_by': 'env_steps', 'observation_fn': None}, 'callbacks': <class 'ray.rllib.algorithms.callbacks.DefaultCallbacks'>, 'create_env_on_driver': False, 'custom_eval_function': None, 'framework': 'tf2', 'num_cpus_for_driver': 1, 'num_workers': 2}, 'time_since_restore': 107.0723135471344, 'timesteps_since_restore': 0, 'iterations_since_restore': 1, 'warmup_time': 5.951690912246704, 'perf': {'cpu_util_percent': 38.81527777777777, 'ram_util_percent': 22.171527777777776, 'gpu_util_percent0': 0.0, 'vram_util_percent0': 0.00013020833333333333}, 'a': 2}",2023-04-05_22-30-37,False,,f1c1cd3291d043e886975cd22677bdeb,ip-172-31-19-228,1,172.31.19.228,31245,113.219,113.219,113.219,1680733837,0,,1,3628c_00000,0.00292063




[2m[36m(experiment pid=31245)[0m done training algo
[2m[36m(experiment pid=31245)[0m 4000
[2m[36m(experiment pid=31245)[0m {}
[2m[36m(experiment pid=31245)[0m 3.93224650215404
[2m[36m(experiment pid=31245)[0m 2.4892613019999303
[2m[36m(experiment pid=31245)[0m 0.7552686244731401
[2m[36m(experiment pid=31245)[0m Checkpoint saved in directory /home/ubuntu/ray_results/experiment_2023-04-05_22-28-38/experiment_3628c_00000_0_2023-04-05_22-28-38/


2023-04-05 22:30:45,247	ERROR tune.py:794 -- Trials did not complete: [experiment_3628c_00000]
2023-04-05 22:30:45,249	INFO tune.py:798 -- Total run time: 126.55 seconds (126.48 seconds for the tuning loop).


<ray.tune.result_grid.ResultGrid object at 0x7fb7ff34a760>


In [5]:
{result.log_dir: result.metrics_dataframe['a'] for result in final_results}


KeyError: 'a'

In [11]:
from ray.tune.logger import pretty_print

final_results.get_best_result().metrics['hist_stats']['episode_reward']

[3.0652830457648164,
 0.644091722092978,
 1.0565137843837575,
 3.6352404391060613,
 2.53774750717975,
 3.2504868899700825,
 1.9874198440814987,
 0.7707253657578359,
 2.797307384827306,
 2.5483773451139538,
 3.9048278693570166,
 0.5053711386258443]

In [17]:
final_results.get_best_result()

Result(metrics={'custom_metrics': {}, 'episode_media': {}, 'info': {'learner': {'default_policy': {'learner_stats': {'cur_kl_coeff': 0.20000000298023224, 'cur_lr': 4.999999873689376e-05, 'total_loss': 0.118393734, 'policy_loss': -0.073018864, 'vf_loss': 0.18794881, 'vf_explained_var': 0.49560168, 'kl': 0.017318841, 'entropy': 76.6001, 'entropy_coeff': 0.0}, 'custom_metrics': {}, 'num_agent_steps_trained': 125.0, 'num_grad_updates_lifetime': 480.5, 'diff_num_grad_updates_vs_sampler_policy': 479.5}}, 'num_env_steps_sampled': 4000, 'num_env_steps_trained': 4000, 'num_agent_steps_sampled': 4000, 'num_agent_steps_trained': 4000}, 'sampler_results': {'episode_reward_max': 3.9048278693570166, 'episode_reward_min': 0.5053711386258443, 'episode_reward_mean': 2.2252826946884086, 'episode_len_mean': 288.0, 'episode_media': {}, 'episodes_this_iter': 12, 'policy_reward_min': {}, 'policy_reward_max': {}, 'policy_reward_mean': {}, 'custom_metrics': {}, 'hist_stats': {'episode_reward': [3.065283045764