In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib inline
%cd ..

/home/ubuntu/sustaingym


In [2]:
from typing import Callable, Optional, Union

import gymnasium as gym
import ray
from ray import tune
from ray.rllib.algorithms import ppo, AlgorithmConfig
from ray.tune.logger import pretty_print
from ray.tune.registry import register_env

from sustaingym.envs.evcharging import EVChargingEnv, RealTraceGenerator, GMMsTraceGenerator, DiscreteActionWrapper, MultiAgentEVChargingEnv
from sustaingym.envs.evcharging.event_generation import AbstractTraceGenerator
from sustaingym.envs.evcharging.utils import \
    DATE_FORMAT, DEFAULT_PERIOD_TO_RANGE, DATE_FORMAT, SiteStr

from gymnasium.wrappers import TimeLimit


###
NUM_SUBPROCESSES = 4
TIMESTEPS = 250_000
EVAL_FREQ = 10_000
SAMPLE_EVAL_PERIODS = {
    'Summer 2019':   ('2019-07-01', '2019-07-14'),
    'Fall 2019':     ('2019-11-04', '2019-11-17'),
    'Spring 2020':   ('2020-04-06', '2020-04-19'),
    'Summer 2021':   ('2021-07-05', '2021-07-18'),
}

def get_env(full: bool, real_trace: bool, dp: str, site: SiteStr, discrete: bool = False,
            multiagent: bool = True, periods_delay: int = 0, seed: int= None) -> Callable:
    """Return environment.

    Args:
        full: if True, use full season; otherwise, use sample 2 weeks
        real_trace: choice of generator
        dp: 'Summer 2019', 'Fall 2019', 'Spring 2020', 'Summer 2021'
        site: 'caltech' or 'jpl'
        discrete: whether to wrap environment in discrete action wrapper
        seed: seed for GMMs generator
        multiagent: if True, return multi-agent environment. Note
            discrete = True and multiagent = True is currently not
            supported.
        periods_delay: number of timesteps for communication delay in
            the multiagent setting, ignored if multiagent = False
    
    Returns:
        Callable of environment
    """
    date_period = DEFAULT_PERIOD_TO_RANGE[dp] if full else SAMPLE_EVAL_PERIODS[dp]

    def _get_env() -> EVChargingEnv:
        if real_trace:
            gen: AbstractTraceGenerator = RealTraceGenerator(site, date_period)
        else:
            gen = GMMsTraceGenerator(site, date_period, seed=seed)
        
        if discrete:
            if multiagent:
                raise ValueError("discrete = True and multiagent = True currently not supported")
            else:
                return DiscreteActionWrapper(EVChargingEnv(gen))
        else:
            if multiagent:
                return MultiAgentEVChargingEnv(gen)
            else:
                return EVChargingEnv(gen)
    return _get_env

In [3]:
register_env("multiagent_evcharging", lambda config: get_env(**config)())
env = get_env(True, True, 'Summer 2019', 'caltech', multiagent=True)()

In [4]:
from ray.rllib.algorithms.ppo import PPOConfig

# full: bool, real_trace: bool, dp: str, site: SiteStr, discrete: bool = False,
#             multiagent: bool = True, periods_delay: int = 0, seed: int= None

# policies = {"policy_{}".format(i): gen_policy(i) for i in range(54)}

# def policy_mapping_fn(agent_id, episode, worker, **kwargs):
#     pol_id = random.choice(policy_ids)
#     return pol_id

config = (
    PPOConfig()
    .environment("multiagent_evcharging", env_config={
        "full": True,
        "real_trace": True,
        "dp": 'Summer 2019',
        "site": 'caltech',
        "discrete": False,
        "multiagent": True,
        "periods_delay": 0,
    })
    # .framework('tf2')
    # .training(num_sgd_iter=10)
    # .multi_agent(
    #     policies=env.agents,
    #     policy_mapping_fn=(lambda agent_id, *args, **kwargs: agent_id),)
    # Use GPUs iff `RLLIB_NUM_GPUS` env var set to > 0.
    # .resources(num_gpus=int(os.environ.get("RLLIB_NUM_GPUS", "0")))
)

In [5]:
algo = config.build(env="multiagent_evcharging")

2023-04-14 04:47:31,341	INFO worker.py:1553 -- Started a local Ray instance.
2023-04-14 04:47:44,501	INFO trainable.py:172 -- Trainable.setup took 15.364 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.


In [10]:
# algo.train()
obs, info = env.reset()

action = {}
for k in obs:
    action[k] = algo.compute_single_action(obs[k])

env.step(action)
# done = False
# eval_results = {"eval_reward": 0, "eval_eps_length": 0}
# while not done:
#     action = eval_algo.compute_single_action(obs)
#     next_obs, reward, done, truncated, info = env.step(action)
#     eval_results["eval_reward"] += reward
#     eval_results["eval_eps_length"] += 1
# eval_algo.stop()
# results = {**train_results, **eval_results}
# print(results)
# tune.report({**results, "a": 3})

({'CA-308': array([0.        , 0.        , 0.        , 0.        , 0.        ,
         0.        , 0.        , 0.        , 0.        , 0.        ,
         0.        , 0.        , 0.        , 0.        , 0.        ,
         0.        , 0.        , 0.        , 0.        , 0.        ,
         0.        , 0.        , 0.        , 0.        , 0.        ,
         0.        , 0.        , 0.        , 0.        , 0.        ,
         0.        , 0.        , 0.        , 0.        , 0.        ,
         0.        , 0.        , 0.        , 0.        , 0.        ,
         0.        , 0.        , 0.        , 0.        , 0.        ,
         0.        , 0.        , 0.        , 0.        , 0.        ,
         0.        , 0.        , 0.        , 0.        , 0.        ,
         0.        , 0.        , 0.        , 0.        , 0.        ,
         0.        , 0.        , 0.        , 0.        , 0.        ,
         0.        , 0.        , 0.        , 0.        , 0.        ,
         0.        , 0. 

E0414 04:54:30.865046896  815217 completion_queue.cc:738]              Kick failed: UNKNOWN:Bad file descriptor {created_time:"2023-04-14T04:54:30.865027927+00:00", errno:9, os_error:"Bad file descriptor", syscall:"eventfd_write"}
E0414 04:55:26.580806687  822026 completion_queue.cc:738]              Kick failed: UNKNOWN:Bad file descriptor {syscall:"eventfd_write", os_error:"Bad file descriptor", errno:9, created_time:"2023-04-14T04:55:26.580779928+00:00"}


In [14]:
import numpy as np
print(algo.compute_single_action(np.zeros((146,))))
print(algo.compute_single_action(np.ones((146,))))
print(algo.compute_single_action(0.4 * np.ones((146,))))

[0.48645762]
[0.49255377]
[0.]


E0414 04:27:08.625068471  633569 completion_queue.cc:738]              Kick failed: UNKNOWN:Bad file descriptor {created_time:"2023-04-14T04:27:08.625044286+00:00", errno:9, os_error:"Bad file descriptor", syscall:"eventfd_write"}


In [None]:
ray.rllib.utils.check_env(env)

In [9]:
# env = MultiAgentEVChargingEnv()
x, _ = env.reset()
env.step()
# for agent in env.agent_iter():
#     observation, reward, termination, truncation, info = env.last()
#     action = policy(observation, agent)
#     env.step(action)

TypeError: step() missing 1 required positional argument: 'action'

In [22]:
from ray.rllib.env.multi_agent_env import make_multi_agent
env = make_multi_agent("CartPole-v0")()
ray.rllib.utils.check_env(env)



In [23]:
env.reset()

({0: array([ 0.04897395,  0.04067178, -0.01104074, -0.02334764], dtype=float32)},
 {0: {}})

In [24]:
import numpy as np

env.step({0: 1})

({0: array([ 0.04978739,  0.23595032, -0.01150769, -0.3194935 ], dtype=float32)},
 {0: 1.0},
 {0: False, '__all__': False},
 {0: False, '__all__': False},
 {0: {}})

In [25]:
env.observation_space

Box([-4.8000002e+00 -3.4028235e+38 -4.1887903e-01 -3.4028235e+38], [4.8000002e+00 3.4028235e+38 4.1887903e-01 3.4028235e+38], (4,), float32)

In [None]:
config.build()

In [None]:
env.observation_space.sample()

In [None]:
env = get_env(True, True, 'Summer 2019', 'caltech', multiagent=True)()
ray.rllib.utils.check_env(env)

In [18]:
from ray.rllib.env.multi_agent_env import make_multi_agent
# By gym string:
ma_cartpole_cls = make_multi_agent("CartPole-v1") 
# Create a 2 agent multi-agent cartpole.
ma_cartpole = ma_cartpole_cls({"num_agents": 2}) 
obs, _ = ma_cartpole.reset()

ray.rllib.utils.check_env(ma_cartpole)

In [20]:
obs

{0: array([-0.03126814, -0.04437572,  0.00936309,  0.00013794], dtype=float32),
 1: array([ 0.01120375, -0.01701763,  0.02676088,  0.03567448], dtype=float32)}

In [17]:
ma_cartpole.observation_space

Box([-4.8000002e+00 -3.4028235e+38 -4.1887903e-01 -3.4028235e+38], [4.8000002e+00 3.4028235e+38 4.1887903e-01 3.4028235e+38], (4,), float32)

In [None]:
import gymnasium as gym 
from ray.rllib.env.multi_agent_env import MultiAgentEnv
from ray.rllib.algorithms.ppo import PPOConfig

class MultiMyEnv(MultiAgentEnv):
    def __init__(self, env_config):
        self.envs={id_: gym.make('CartPole-v0') for id_ in env_config["policies"]}
        self.ep_dones=set()
        self._spaces_in_preferred_format = True
        self.action_space=gym.spaces.Dict({id_: self.envs[id_].action_space for id_ in env_config["policies"]})
        self.observation_space=gym.spaces.Dict({id_: self.envs[id_].observation_space for id_ in env_config["policies"]})
        self._agent_ids=set(self.envs.keys())
        super().__init__()

    def reset(self):
        self.ep_dones=set()
        return {env_id: env.reset() for env_id, env in self.envs.items()}

    def action_space_contains(self, x: dict) -> bool:
        for env, action in x.items():
            if not self.envs[env].action_space.contains(action):
                return False
        return True

    def step(self, action_dict):
        new_state, reward, done, info={}, {}, {}, {}
        for env, action in action_dict.items():
            new_state[env], reward[env], done[env], info[env]=self.envs[env].step(
                action)
            if done[env]:
                self.ep_dones.add(env)
        done["__all__"]=self.ep_dones == self._agent_ids
        return new_state, reward, done, info

policies={"2","1", "3"}
config = PPOConfig().environment(env=MultiMyEnv, env_config = {'policies': policies}, 
).framework(framework="torch").rollouts(num_rollout_workers=0).multi_agent(policies=policies, policy_mapping_fn=lambda env_id, ep, *args, **kwargs: env_id)
algo=config.build()

algo.train()