In [1]:
import os, sys

import gym, ray
from ray import tune
from ray.rllib.agents import ppo, ddpg
from ray.tune import register_env

from src.environments.create_env import create_env
from src.environments.gym_power_voltage_env import GymPowerVoltageEnv
from src.samplers.load_samplers import load_samplers

import torch as th

In [6]:
config = {'path_to_data':   'data/',
          't0_hr': 6.,  # When the episode start (default value 6AM)
          'dt_min': 30,  # Timestep size
          'ev_dt_min': 60,  # Timestep size for EV arrivals
          'ev_sampling_dt_min': 60,  # How EV sessions are sampled from the data
          'apply_gaussian_noise': False,  # Make data noisy
          'ev_utility_coef_mean': 1,  # Mean value of the utility coefficient for the EVs
          'ev_utility_coef_scale': 0.13,  # STD of the utility coefficient for the EVs
          'days_per_month_train': 20,  # Days per month for training
          'ev_session_months_train': ['01', '02', '03', '04', '06', '07', '08', '09', '10', '11', ],
          # Months to sample EV sessions for training
          'grid_to_use': 'ieee16',  # What grid topology to use. Now supports only IEEE16.
          'ev_session_months_test': ['05', '12'],  # Months to sample EV sessions for test
          'n_ps_pvs': 4,  # Amount of solar panels that use PecanStreet data
          'n_canopy_pvs': 0,  # Amount of solar panels that use canopy data
          'canopy_pv_rated_power': 250,  # Rated power of these panels
          'n_loads': 0,  # Amount of inflexible loads
          'n_feeders': 1,  # Amount of feeders
          'n_ev_chargers': 4,  # Amount of EV chargers

          'ps_pvs_rated_power': 4,  # Rated power of these panels
          'avg_evs_per_day': 3.5,  # Scaling of the EV arrival rate
          'feeder_p_min': -5,  # Capacity of the feeders
          'g': 4,  # Conductance of each line
          'i_max': 25,  # Capacity of each line

          'environment_type': 'gym',

          'dataset_max_size': 1,
          'split_train_test': False,

          }


In [3]:
def env_creator(a):
    # Preload samplers, it is necessary to avoid re-loading data each time env is created
    (ps_samplers_dict, ps_metadata, canopy_sampler, canopy_metadata,
     price_sampler, price_metadata, ev_sampler, elaadnl_metadata) = load_samplers(config)

    return create_env(
        config,
        ps_samplers_dict,
        ps_metadata,
        canopy_sampler,
        canopy_metadata,
        price_sampler,
        price_metadata,
        ev_sampler,
        elaadnl_metadata
    )  # return an env instance


# Read this on how to run our own environments
# https://docs.ray.io/en/latest/rllib/rllib-env.html

ray.init()
register_env("my_env", env_creator)

In [3]:
print(th.cuda.is_available())

True


In [7]:
trainer = ddpg.DDPGTrainer(
    env="my_env",
    # Stopping condition
    # stop={"episode_reward_mean":200},

    # Config
    # The default DDPG specific config is used with required 
    # Options for the config are in the default DDPG config: 
    # https://docs.ray.io/en/latest/rllib/rllib-algorithms.html#ddpg
    config={
        "env": "my_env",
        "framework": "torch",
        "num_gpus":1,
        "num_workers":1,
    },
    # checkpoint_freq=1
)

trainer.train()

# trainer = ppo.PPOTrainer(env="my_env", config={
#     "env_config": config,  # config to pass to env class
#     "framework": "torch",
# })

# while True:
#     print(trainer.train())


[2m[36m(RolloutWorker pid=22196)[0m {'path_to_data': 'data/', 't0_hr': 6.0, 'dt_min': 30, 'ev_dt_min': 60, 'ev_sampling_dt_min': 60, 'apply_gaussian_noise': False, 'ev_utility_coef_mean': 1, 'ev_utility_coef_scale': 0.13, 'days_per_month_train': 20, 'ev_session_months_train': ['01', '02', '03', '04', '06', '07', '08', '09', '10', '11'], 'grid_to_use': 'ieee16', 'ev_session_months_test': ['05', '12'], 'n_ps_pvs': 4, 'n_canopy_pvs': 0, 'canopy_pv_rated_power': 250, 'n_loads': 0, 'n_feeders': 1, 'n_ev_chargers': 4, 'ps_pvs_rated_power': 4, 'avg_evs_per_day': 3.5, 'feeder_p_min': -5, 'g': 4, 'i_max': 25, 'environment_type': 'gym', 'dataset_max_size': 1, 'split_train_test': False}
[2m[36m(RolloutWorker pid=22196)[0m loading pecanstreet
[2m[36m(RolloutWorker pid=22196)[0m loading pvdata
[2m[36m(RolloutWorker pid=22196)[0m loading elaadnl
[2m[36m(RolloutWorker pid=22196)[0m loading newyork_price


2022-05-25 17:42:49,401	INFO trainable.py:152 -- Trainable.setup took 158.181 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.


[2m[36m(RolloutWorker pid=22196)[0m Reseting episode to index: 0
[2m[36m(RolloutWorker pid=22196)[0m Reseting episode to index: 0
[2m[36m(RolloutWorker pid=22196)[0m Reseting episode to index: 0
[2m[36m(RolloutWorker pid=22196)[0m Reseting episode to index: 0
[2m[36m(RolloutWorker pid=22196)[0m Reseting episode to index: 0
[2m[36m(RolloutWorker pid=22196)[0m Reseting episode to index: 0
[2m[36m(RolloutWorker pid=22196)[0m Reseting episode to index: 0
[2m[36m(RolloutWorker pid=22196)[0m Reseting episode to index: 0
[2m[36m(RolloutWorker pid=22196)[0m Reseting episode to index: 0




TypeError: train() got an unexpected keyword argument 'reuse_actors'

In [11]:
from ray.tune.logger import pretty_print

for i in range(100):
    result = trainer.train()
    # print(pretty_print(result))

    print(i)
    print(f"episode_reward_max: {result['episode_reward_max']}")
    print(f"episode_reward_min: {result['episode_reward_min']}")
    print(f"episode_reward_mean: {result['episode_reward_mean']}")

    if i % 10 == 0:
       checkpoint = trainer.save()
       print("checkpoint saved at", checkpoint)

StopIteration: 

In [9]:
print(f"episode_reward_max: {result['episode_reward_max']}")
print(f"episode_reward_min: {result['episode_reward_min']}")
print(f"episode_reward_mean: {result['episode_reward_mean']}")

episode_reward_max: -7050.0
episode_reward_min: -7050.0
episode_reward_mean: -7050.0
