In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib inline
%cd ..

/home/ubuntu/sustaingym


In [2]:
from typing import Callable, Optional, Union

import gymnasium as gym
import ray
from ray import tune
from ray.rllib.algorithms import ppo, AlgorithmConfig
from ray.tune.logger import pretty_print
from ray.tune.registry import register_env

from sustaingym.envs.evcharging import EVChargingEnv, RealTraceGenerator, GMMsTraceGenerator, DiscreteActionWrapper
from sustaingym.envs.evcharging.event_generation import AbstractTraceGenerator
from sustaingym.envs.evcharging.utils import \
    DATE_FORMAT, DEFAULT_PERIOD_TO_RANGE, DATE_FORMAT, SiteStr

from gymnasium.wrappers import TimeLimit


###
NUM_SUBPROCESSES = 4
TIMESTEPS = 250_000
EVAL_FREQ = 10_000
SAMPLE_EVAL_PERIODS = {
    'Summer 2019':   ('2019-07-01', '2019-07-14'),
    'Fall 2019':     ('2019-11-04', '2019-11-17'),
    'Spring 2020':   ('2020-04-06', '2020-04-19'),
    'Summer 2021':   ('2021-07-05', '2021-07-18'),
}

def get_env(full: bool, real_trace: bool, dp: str, site: SiteStr, discrete: bool = False, seed: int=None) -> Callable:
    """Return environment.

    Args:
        full: if True, use full season; otherwise, use sample 2 weeks
        real_trace: choice of generator
        dp: 'Summer 2019', 'Fall 2019', 'Spring 2020', 'Summer 2021'
        site: 'caltech' or 'jpl'
        discrete: whether to wrap environment in discrete action wrapper
        seed: seed for GMMs generator
    
    Returns:
        Callable of environment
    """
    date_period = DEFAULT_PERIOD_TO_RANGE[dp] if full else SAMPLE_EVAL_PERIODS[dp]

    def _get_env() -> EVChargingEnv:
        if real_trace:
            gen: AbstractTraceGenerator = RealTraceGenerator(site, date_period)
        else:
            gen = GMMsTraceGenerator(site, date_period, seed=seed)
        
        if discrete:
            return TimeLimit(DiscreteActionWrapper(EVChargingEnv(gen, vectorize_obs=False)), max_episode_steps=288)
        else:
            return TimeLimit(EVChargingEnv(gen, vectorize_obs=False), max_episode_steps=288)
    return _get_env

In [3]:
from ray import tune
from ray.air import session
from ray.rllib.algorithms import ppo, AlgorithmConfig

register_env("my_env", lambda config: get_env(**config)())

train_config = (
    ppo.PPOConfig()
    .environment("my_env", env_config={
        "full": True,
        "real_trace": False,
        "dp": "Summer 2019",
        "site": "caltech",
        "discrete": False,
        "seed": 123
    })
    # .framework("tf2")
    .training(train_batch_size=10_000)
)
algo = train_config.build(env="my_env")

2023-04-14 02:07:16,117	INFO worker.py:1553 -- Started a local Ray instance.
2023-04-14 02:07:25,820	INFO trainable.py:172 -- Trainable.setup took 11.636 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.


In [None]:
algo.train()

In [5]:
from sustaingym.algorithms.evcharging.baselines import RLLibAlgorithm

env = get_env(full=False, real_trace=True, dp='Summer 2019', site='caltech', discrete=False, seed=True)()
rllib_algo = RLLibAlgorithm(env, algo)
reward_breakdown = rllib_algo.run(2).to_dict('list')

env observation space:  None


100%|██████████| 2/2 [00:11<00:00,  5.98s/it]


In [5]:
reward_breakdown

{'reward': [7.190068854778987, 8.184220770725473],
 'profit': [9.174460603153184, 10.526924921587453],
 'carbon_cost': [1.9843570817075287, 2.3426348175286504],
 'excess_charge': [3.4666666666666665e-05, 6.933333333333357e-05],
 'max_profit': [11.101619999999999, 12.785959999999998]}

In [5]:


###

from ray import tune
from ray.air import session


# def trainable(config: dict):
#     checkpoint_dir = tune.get_trial_dir()
#     print(checkpoint_dir)
#     print(config)

# def trainable(config: dict):
#     checkpoint_dir = tune.get_trial_dir()
#     print(checkpoint_dir)
#     print(config)

#     train_config = (
#         ppo.PPOConfig()
#         .environment("my_env", env_config={
#             "full": True,
#             "real_trace": False,
#             "dp": "Summer 2019",
#             "site": "caltech",
#             "discrete": False,
#             "seed": 123
#         })
#         .framework("tf2")
#     )
#     algo = train_config.build(env="my_env")

#     for i in range(2):
#         train_results = algo.train()

#         algo.


def experiment(config):

    algo = train_config.build(env="my_env")
    print("algo built")
    for i in range(1):
        print("begin training algo")
        train_results = algo.train()
        print("done training algo")
        print(train_results['agent_timesteps_total'])
        print(train_results['custom_metrics'])
        print(train_results['episode_reward_max'])
        print(train_results['episode_reward_mean'])
        print(train_results['episode_reward_min'])

        # print(pretty_print(train_results))
        algo.save(checkpoint_dir)
        print(f"Checkpoint saved in directory {checkpoint_dir}")
        tune.report({**train_results, "a": 2})
    algo.stop()

    # Manual Eval
    eval_config = (
        ppo.PPOConfig()
        .environment("my_env", env_config={
            "full": False,
            "real_trace": True,
            "dp": "Summer 2019",
            "site": "caltech",
            "discrete": False,
            "seed": 123
        })
    )
    eval_algo = eval_config.build(env="my_env")
    eval_algo.load_checkpoint(checkpoint_dir)
    env = eval_algo.workers.local_worker().env

    obs, info = env.reset()
    done = False
    eval_results = {"eval_reward": 0, "eval_eps_length": 0}
    while not done:
        action = eval_algo.compute_single_action(obs)
        next_obs, reward, done, truncated, info = env.step(action)
        eval_results["eval_reward"] += reward
        eval_results["eval_eps_length"] += 1
    eval_algo.stop()
    results = {**train_results, **eval_results}
    print(results)
    tune.report({**results, "a": 3})

import os

# ray.init(num_cpus=3)
# register_env("my_env", lambda config: get_env(**config)())

resources = ppo.PPO.default_resource_request(ppo.PPOConfig())

tuner = tune.Tuner(
    # experiment
    tune.with_resources(experiment, resources=resources),
    param_space={}
)
final_results = tuner.fit()

print(final_results)

0,1
Current time:,2023-04-12 07:34:41
Running for:,00:16:41.48
Memory:,8.2/15.3 GiB

Trial name,status,loc
experiment_27971_00000,PENDING,


2023-04-12 07:34:46,079	ERROR tune.py:794 -- Trials did not complete: [experiment_27971_00000]
2023-04-12 07:34:46,079	INFO tune.py:798 -- Total run time: 1006.51 seconds (1006.50 seconds for the tuning loop).


<ray.tune.result_grid.ResultGrid object at 0x7f81c7f87040>
