In [1]:
import os, sys

import gym, ray
from ray import tune
from ray.rllib.agents import ppo, ddpg
from ray.tune import register_env

from src.environments.create_env import create_env
from src.environments.gym_power_voltage_env import GymPowerVoltageEnv
from src.samplers.load_samplers import load_samplers

In [2]:
config = {'path_to_data':   'data/',
          't0_hr': 6.,  # When the episode start (default value 6AM)
          'dt_min': 30,  # Timestep size
          'ev_dt_min': 60,  # Timestep size for EV arrivals
          'ev_sampling_dt_min': 60,  # How EV sessions are sampled from the data
          'apply_gaussian_noise': False,  # Make data noisy
          'ev_utility_coef_mean': 1,  # Mean value of the utility coefficient for the EVs
          'ev_utility_coef_scale': 0.13,  # STD of the utility coefficient for the EVs
          'days_per_month_train': 20,  # Days per month for training
          'ev_session_months_train': ['01', '02', '03', '04', '06', '07', '08', '09', '10', '11', ],
          # Months to sample EV sessions for training
          'grid_to_use': 'ieee16',  # What grid topology to use. Now supports only IEEE16.
          'ev_session_months_test': ['05', '12'],  # Months to sample EV sessions for test
          'n_ps_pvs': 4,  # Amount of solar panels that use PecanStreet data
          'n_canopy_pvs': 0,  # Amount of solar panels that use canopy data
          'canopy_pv_rated_power': 250,  # Rated power of these panels
          'n_loads': 0,  # Amount of inflexible loads
          'n_feeders': 1,  # Amount of feeders
          'n_ev_chargers': 4,  # Amount of EV chargers

          'ps_pvs_rated_power': 4,  # Rated power of these panels
          'avg_evs_per_day': 3.5,  # Scaling of the EV arrival rate
          'feeder_p_min': -5,  # Capacity of the feeders
          'g': 4,  # Conductance of each line
          'i_max': 25,  # Capacity of each line
          }


In [3]:
def env_creator(a):
    # Preload samplers, it is necessary to avoid re-loading data each time env is created
    (ps_samplers_dict, ps_metadata, canopy_sampler, canopy_metadata,
     price_sampler, price_metadata, ev_sampler, elaadnl_metadata) = load_samplers(config)

    return create_env(
        config,
        ps_samplers_dict,
        ps_metadata,
        canopy_sampler,
        canopy_metadata,
        price_sampler,
        price_metadata,
        ev_sampler,
        elaadnl_metadata
    )  # return an env instance


# Read this on how to run our own environments
# https://docs.ray.io/en/latest/rllib/rllib-env.html

ray.init()
register_env("my_env", env_creator)

In [6]:

trainer = ddpg.DDPGTrainer(
    env="my_env",
    # Stopping condition
    # stop={"episode_reward_mean":200},

    # Config
    # The default DDPG specific config is used with required 
    # Options for the config are in the default DDPG config: 
    # https://docs.ray.io/en/latest/rllib/rllib-algorithms.html#ddpg
    config={
        "env": "my_env",
        "framework": "torch",
        "num_gpus":0,
        "num_workers":1,
    },
    # checkpoint_freq=1
)
trainer.train()

# trainer = ppo.PPOTrainer(env="my_env", config={
#     "env_config": config,  # config to pass to env class
#     "framework": "torch",
# })

# while True:
#     print(trainer.train())


2022-05-16 19:51:03,175	INFO simple_q.py:161 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting `simple_optimizer=True` if this doesn't work for you.
2022-05-16 19:51:03,178	INFO trainer.py:864 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.


[2m[36m(RolloutWorker pid=5928)[0m {'path_to_data': 'data/', 't0_hr': 6.0, 'dt_min': 30, 'ev_dt_min': 60, 'ev_sampling_dt_min': 60, 'apply_gaussian_noise': False, 'ev_utility_coef_mean': 1, 'ev_utility_coef_scale': 0.13, 'days_per_month_train': 20, 'ev_session_months_train': ['01', '02', '03', '04', '06', '07', '08', '09', '10', '11'], 'grid_to_use': 'ieee16', 'ev_session_months_test': ['05', '12'], 'n_ps_pvs': 4, 'n_canopy_pvs': 0, 'canopy_pv_rated_power': 250, 'n_loads': 0, 'n_feeders': 1, 'n_ev_chargers': 4, 'ps_pvs_rated_power': 4, 'avg_evs_per_day': 3.5, 'feeder_p_min': -5, 'g': 4, 'i_max': 25}


  torch.from_numpy(self.action_space.low).float()
2022-05-16 19:53:41,624	INFO trainable.py:152 -- Trainable.setup took 158.455 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.


{'episode_reward_max': 451.1024208964704,
 'episode_reward_min': 140.17882372613917,
 'episode_reward_mean': 299.4716468125054,
 'episode_len_mean': 47.0,
 'episode_media': {},
 'episodes_this_iter': 31,
 'policy_reward_min': {},
 'policy_reward_max': {},
 'policy_reward_mean': {},
 'custom_metrics': {},
 'hist_stats': {'episode_reward': [434.62222078957916,
   344.64107935347465,
   404.55860787608924,
   451.1024208964704,
   232.61883043846652,
   330.28486646341975,
   349.8367106543632,
   297.51965905303155,
   263.71013849729894,
   287.5411051444022,
   302.7718862200873,
   399.50292471997136,
   339.32341093025195,
   319.8965938093133,
   279.26307283532,
   272.0093919674605,
   224.60744414973527,
   247.29895706889977,
   379.10225512802987,
   305.25093242293445,
   192.7317190537429,
   353.1363935910019,
   263.22432648117405,
   246.30434060199119,
   372.5339313909495,
   307.82443542966735,
   285.9229746659906,
   213.26370113366164,
   140.17882372613917,
   276.9

Box([ -5.  -5.  -5.  -5.  -5.  -5.  -5.  -5.  -5.  -5.  -5.  -5.  -5.  -5.
  -5.  -5.  -5.  -5.  -5.  -5.  -5.  -5. 300. 300. 300. 300. 300. 300.
 300. 300. 300. 300. 300. 300. 300. 300. 300. 300. 300. 300. 300. 300.
 300. 300.], [ 10.  10.  10.  10.  10.  10.  10.  10.  10.  10.  10.  10.  10.  10.
  10.  10.  10.  10.  10.  10.  10.  10. 400. 400. 400. 400. 400. 400.
 400. 400. 400. 400. 400. 400. 400. 400. 400. 400. 400. 400. 400. 400.
 400. 400.], (44,), float64)
