In [1]:
from stable_baselines3.sac.policies import MlpPolicy
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3 import SAC
import logging
import gym
import numpy as np
import os

# add reference libraries here. Current structure will use the relative path from this file
libs = ["../../OpenIPSL-1.5.0/OpenIPSL/package.mo",
        "../../OpenIPSL-1.5.0/ApplicationExamples/KundurSMIB/package.mo"] # KundurSMIB modified to have voltage control

# check that all the paths to library package.mo files exist
# DymolaInterface() also checks this but I've found this warning helpful
for lib in libs:
    if not os.path.isfile(lib):
        print(f"Cannot find the library {lib}")

mo_name = "KundurSMIB.SMIB_vref" # name of Modelica model in the Library.Model format
env_entry_point = 'examples:DymSMIBEnv' # Python package location of RL environment

v_ref = 1
time_step = 1 # time delta in seconds
positive_reward = 1
negative_reward = -100 # penalize RL agent for is_done
log_level = logging.DEBUG

# these config values are passed to the model specific environment class
# mo_name and libs are passed on to the DymolaBaseEnv class
config = {
    'mo_name': mo_name,
    'libs': libs,
    'v_ref': v_ref,
    'time_step': time_step,
    'positive_reward': positive_reward,
    'negative_reward': negative_reward,
    'log_level': log_level
}

# enable the model specific class as an OpenAI gym environment
from gym.envs.registration import register
env_name = "MicrogridEnv-v0"

register(
    id=env_name,
    entry_point=env_entry_point,
    kwargs=config
)

In [2]:
# create the environment. this will run an initial step and must return [True, [...]] or something is broken
# TODO: create error handling/warnings if simulations don't work (i.e. returns [False], [...])
env = gym.make(env_name)

Removing old files...




In [4]:
# this bit is for normalizing the reward later (to improve training), can be safely ignored for now
min_reward = np.inf
max_reward = -np.inf
avg_reward = 0
obs = env.reset()

# show performance over 10 seconds in a do-nothing case (control voltage set at 1.0 pu)
for _ in range(10):
    action = [2.4] # control voltage = 2.4 pu
    obs, reward, done, info = env.step(action)
    if done:
        env.reset()
    
    # a continuation of the reward normalizing piece (can be ignored for now)
    avg_reward += 1/30 * reward
    if reward < min_reward:
        min_reward = reward
    if reward > max_reward:
        max_reward = reward

Removing old files...
[True, [1.004036435712441, 2.4]]
[True, [0.9434768685264913, 2.4]]
[True, [0.8764119088027409, 2.4]]
[True, [0.8364366439252497, 2.4]]
[True, [0.8408108452652046, 2.4]]
[True, [0.879102182918432, 2.4]]
[True, [0.9147671483845703, 2.4]]
[True, [0.9284701799284032, 2.4]]
[True, [0.9185757247925377, 2.4]]
[True, [0.8953437650988731, 2.4]]


In [8]:
dt = 1
start = 0
stop = 1
p_gen = []
for i in range(10):
    v_ref = 2.4
    if i > 0:
        env.dymola.importInitialResult('dsres.mat', atTime=start)
    res = env.dymola.simulateExtendedModel('KundurSMIB.SMIB_vref', startTime=start, stopTime=stop, 
                                           initialNames=['v_ref'], initialValues=[v_ref], 
                                           finalNames=['G1.machine.P', 'v_ref'])
    print(res)
    start += dt
    stop += dt
    
    p_gen += [res[1][0]]

[True, [1.004036435712441, 2.4]]
[True, [0.9434775763840382, 2.4]]
[True, [0.8764145590134519, 2.4]]
[True, [0.8364397731572241, 2.4]]
[True, [0.8408130279189785, 2.4]]
[True, [0.8791020200035254, 2.4]]
[True, [0.9147662176596749, 2.4]]
[True, [0.9284693990952451, 2.4]]
[True, [0.9185764052902812, 2.4]]
[True, [0.895343659328478, 2.4]]


In [None]:
os.getcwd()

In [7]:
for file in os.listdir('temp_dir'):
    print(file)#os.getcwd()
    os.remove(os.path.join(os.getcwd(), 'temp_dir', file))
    
for file in os.listdir('temp_dir'):
    print(file)

buildlog.txt
dsfinal.txt
dsin.txt
dslog.txt
dsmodel.c
dsres.mat
dymosim.exe


In [None]:
# this bit is for normalizing the reward later (to improve training), can be safely ignored for now
min_reward = np.inf
max_reward = -np.inf
avg_reward = 0
obs = env.reset()

# show performance over 10 seconds in a do-nothing case (control voltage set at 1.0 pu)
for _ in range(10):
    action = [2.4] # control voltage = 1.0 pu
    obs, reward, done, info = env.step(action)
    if done:
        env.reset()
    
    # a continuation of the reward normalizing piece (can be ignored for now)
    avg_reward += 1/30 * reward
    if reward < min_reward:
        min_reward = reward
    if reward > max_reward:
        max_reward = reward

In [None]:
# reset environment
obs = env.reset()

# run a randomized agent to verify:
#    (1) that the simulation runs when we are controlling and changing an input value
#    (2) that the simulation outputs different results than the do-nothing or rule-based controller
for _ in range(10):
    action = [np.random.uniform(1.0,2.0)]
    obs, reward, done, info = env.step(action)
    if done:
        env.reset()

In [None]:
# create a stable-baselines Soft Actor Critic agent
model = SAC(MlpPolicy, env, verbose=1, tensorboard_log="tensorboard_logs")

# run a short training period to verify that the syntax is ok
print("Training the model...")
obs = env.reset()
model.learn(total_timesteps=20, tb_log_name="microgrid")

# run a short test period to verify that the syntax is ok
print("Testing the model...")
obs = env.reset()
rl_reward = 0
for _ in range(10):
    action, _state = model.predict(obs)
    obs, reward, done, info = env.step(action)
    if done:
        env.reset()
    rl_reward += reward