In [1]:
from stable_baselines3.sac.policies import MlpPolicy
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3 import SAC
import logging
import gym
import numpy as np
import os

# add reference libraries here. Current structure will use the relative path from this file
libs = ["../../OpenIPSL-1.5.0/OpenIPSL/package.mo"]

# check that all the paths to library package.mo files exist
# DymolaInterface() also checks this but I've found this warning helpful
for lib in libs:
    if not os.path.isfile(lib):
        print(f"Cannot find the library {lib}")

mo_name = "OpenIPSL.Examples.IEEE9.IEEE_9_Base_Case_OL" # name of Modelica model in the Library.Model format
env_entry_point = 'examples:IEEE9Env' # Python package location of RL environment

time_step = 1 # time delta in seconds
positive_reward = 1
negative_reward = -100 # penalize RL agent for is_done
log_level = logging.DEBUG
default_action = [0]

# these config values are passed to the model specific environment class
# mo_name and libs are passed on to the DymolaBaseEnv class
config = {
    'mo_name': mo_name,
    'libs': libs,
    'time_step': time_step,
    'positive_reward': positive_reward,
    'negative_reward': negative_reward,
    'log_level': log_level,
    'default_action': default_action
}

In [2]:
# enable the model specific class as an OpenAI gym environment
from gym.envs.registration import register
env_name = "MicrogridEnv-v0"

register(
    id=env_name,
    entry_point=env_entry_point,
    kwargs=config
)

In [3]:
# create the environment. this will run an initial step and must return [True, [...]] or something is broken
# TODO: create error handling/warnings if simulations don't work (i.e. returns [False], [...])
env = gym.make(env_name)



In [4]:
env.dymola.getLastErrorLog().splitlines()

['Translation of <a href="Modelica://OpenIPSL.Examples.IEEE9.IEEE_9_Base_Case_OL">OpenIPSL.Examples.IEEE9.IEEE_9_Base_Case_OL</a>:',
 'The DAE has 1330 scalar unknowns and 1330 scalar equations.',
 'The translated model contains large nonlinear systems of equations. The simulation may run faster in DAE mode. Enable DAE mode by setting the flag Advanced.Define.DAEsolver and use one of the solvers Dassl, Radau IIa, Esdirk*, or Sdirk34hw.',
 '',
 'Statistics',
 '',
 'Original Model',
 '  Number of components: 540',
 '  Variables: 3444',
 '  Parameters: 2114 (2249 scalars)',
 '  Unknowns: 1330 (1330 scalars)',
 '  Differentiated variables: 116 scalars',
 '  Equations: 1106',
 '  Nontrivial: 857',
 'Translated Model',
 '  Constants: 418 scalars',
 '  Free parameters: 579 scalars',
 '  Parameter depending: 1270 scalars',
 '  Continuous time states: 116 scalars',
 '  Time-varying variables: 541 scalars',
 '  Alias variables: 771 scalars',
 '  Number of mixed real/discrete systems of equations

In [6]:
# this bit is for normalizing the reward later (to improve training), can be safely ignored for now
reward_list = []
obs = env.reset()

# show performance over 10 seconds in a do-nothing case (control voltage set at 1.0 pu)
for _ in range(10):
    action = default_action # forces change in p_mech reference for dispatch
    obs, reward, done, info = env.step(action)
    if done:
        env.reset()
        
    reward_list += [reward]
    
# a continuation of the reward normalizing piece (can be ignored for now)
env.min_reward = min(reward_list)
env.max_reward = max(reward_list)
env.avg_reward = sum(reward_list) / len(reward_list)

In [7]:
model = SAC(MlpPolicy, env)
model.learn(10)

<stable_baselines3.sac.sac.SAC at 0x199456d83a0>

In [8]:
# show performance over 10 seconds in a do-nothing case (control voltage set at 1.0 pu)
obs = env.reset()
sum_rewards = 0
for _ in range(10):
    action = model.predict(obs) # forces change in p_mech reference for dispatch
    action = action[0] # weird behavior where the model thinks the action output by model.predict has len() = 2
    obs, reward, done, info = env.step(action)
    if done:
        env.reset()
    sum_rewards += reward

In [9]:
sum_rewards

0.5925356113818473