In [1]:
from stable_baselines3.sac.policies import MlpPolicy
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3 import SAC
import logging
import gym
import numpy as np
import os

# add reference libraries here. Current structure will use the relative path from this file
libs = ["../../OpenIPSL-1.5.0/OpenIPSL/package.mo"]

# check that all the paths to library package.mo files exist
# DymolaInterface() also checks this but I've found this warning helpful
for lib in libs:
    if not os.path.isfile(lib):
        print(f"Cannot find the library {lib}")

mo_name = "OpenIPSL.Examples.IEEE9.IEEE_9_Base_Case_OL" # name of Modelica model in the Library.Model format
env_entry_point = 'examples:IEEE9Env' # Python package location of RL environment

time_step = 1 # time delta in seconds
positive_reward = 1
negative_reward = -100 # penalize RL agent for is_done
log_level = logging.DEBUG
default_action = [0,0,0]

# these config values are passed to the model specific environment class
# mo_name and libs are passed on to the DymolaBaseEnv class
config = {
    'mo_name': mo_name,
    'libs': libs,
#     'actions': actions,
#     'states': states,
    'time_step': time_step,
    'positive_reward': positive_reward,
    'negative_reward': negative_reward,
    'log_level': log_level,
    'default_action': default_action
}

# enable the model specific class as an OpenAI gym environment
from gym.envs.registration import register
env_name = "MicrogridEnv-v0"

register(
    id=env_name,
    entry_point=env_entry_point,
    kwargs=config
)

In [2]:
# create the environment. this will run an initial step and must return [True, [...]] or something is broken
# TODO: create error handling/warnings if simulations don't work (i.e. returns [False], [...])
env = gym.make(env_name)

[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1.2520915064553761, 1.2520914545994895, 1.2520915214272599]




In [3]:
model = SAC(MlpPolicy, env, learning_rate=10**-6, learning_starts=64, tensorboard_log="logs", verbose=1)

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


In [25]:
# print('this')
# model.learn(100, reset_num_timesteps=False)
# model.save("IEEE9_5k")
# i = 0
import time


for i in range(10):
    tic = time.time()
   
    model.env.reset() 
    model.learn(500, reset_num_timesteps=False, tb_log_name="first")
    model.save("IEEE9_5k")
    
    toc = time.time()
    print(toc-tic)
    print(f"successful round {i}")
    

[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1.2520915064553761, 1.2520914545994895, 1.2520915214272599]
Logging to logs\first_0
CALLING RESET
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1.2520915064553761, 1.2520914545994895, 1.2520915214272599]


PermissionError: [WinError 32] The process cannot access the file because it is being used by another process: 'C:\\Users\\Aisling\\Documents\\modelicagym\\examples\\temp_dir\\dslog.txt'

In [27]:
model.actor.parameters_to_vector()

array([ 0.08098724, -0.00318718,  0.10452362, ..., -0.00433198,
       -0.03372954, -0.02442409], dtype=float32)

In [None]:
start = 0
delta = 2
stop = delta
cumulated_voltages = np.zeros(9)
i=0
env.reset()
for i in range(3):
    if i > 0:
        env.dymola.importInitial()
    res = env.dymola.simulateExtendedModel(env.model_name, startTime=start, stopTime=stop, finalNames=['integrator'])
    print(res)
    
    average_voltages = np.subtract(res[1][0], cumulated_voltages) / delta
    cumulated_voltages = res[1][0]
    
    print(average_voltages)
    start += delta
    stop += delta

In [42]:
# show performance over 10 seconds in a do-nothing case (control voltage set at 1.0 pu)
obs = env.reset()
sum_rewards = 0
for i in range(10):
#     print(i)
    #action = model.predict(obs) # forces change in p_mech reference for dispatch
    action = [0,0,0] # weird behavior where the model thinks the action output by model.predict has len() = 2
    obs, reward, done, info = env.step(action)
    print(reward)
#     if done:
#         env.reset()
    sum_rewards += reward

[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1.2520915064553761, 1.2520914545994895, 1.2520915214272599]
-3154.7146811374255
-17.867592517724162
-17.919291149103753
-18.173446984722787
-18.38409359862728
-18.189229510229435
-18.05220232074157
-18.171127147252573
-18.310549707404128
-18.108674394477553


In [43]:
# show performance over 10 seconds in a do-nothing case (control voltage set at 1.0 pu)
# obs = env.reset()
# action = model.predict(obs)
sum_rewards = 0
for i in range(100):
#     print(i)
    action = model.predict(obs)[0] # forces change in p_mech reference for dispatch
#     action = [0,0,0] # weird behavior where the model thinks the action output by model.predict has len() = 2
    obs, reward, done, info = env.step(action)
    print(reward)
#     if done:
#         env.reset()
    sum_rewards += reward

-19.223683838267334
-17.911497065589764
-18.074285309141235
-17.74555553801506
-18.25989544295942
-17.371006339173217
-18.80227881077744
-17.828808334262888
-18.367477817354214
-18.15180457962792
-17.932744334594656
-18.268801938581145
-17.307404560480833
-17.6208992706187
-18.721075909322856
-19.130283486467064
-16.308434052781603
-18.132663785550463
-17.710291723767817
-18.169158505029767
-17.18307288725943
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1.2520915064553761, 1.2520914545994895, 1.2520915214272599]
-34.91511186332437
-946.2623922439831
-17.752654114631913
-18.018508698783922
-18.33772868205209
-18.2557631512863
-17.299071855584906
-18.556525786850923
-18.365771426113476
-17.684014631072912
-17.544818088599587
-17.960781912514896
-17.866611138789693
-18.937776981973602
-18.172325199114848
-17.918037752977177
-19.579876763659335
-18.782995815460296
-18.407908775064474
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1.2520915064553761, 1.2520914545994895, 1.2520915214272599]
-34.91511186332437

In [50]:
env.state[:9]


[0.9501526800553677,
 0.9467588990221429,
 0.9626947175978273,
 0.9387281673957091,
 0.9208572296010331,
 0.9169480882484464,
 0.9446659206238337,
 0.9319016099918542,
 0.9558467639648995]

In [47]:
print(rl_rew, sum_rewards)

-4949.147957992667 -3326.1113975409958


In [36]:
obs, reward, done, info = env.step(action)

TypeError: unsupported operand type(s) for *: 'NoneType' and 'float'

In [None]:

import os
env.dymola.cd(os.getcwd())

In [None]:
print(env.dymola.openModel("../../OpenIPSL-1.5.0/OpenIPSL/package.mo"))

In [11]:
env.dymola.simulateModel('OpenIPSL.Examples.IEEE9.IEEE_9_Base_Case_OL')

True

In [None]:
env.dymola.getLastError()

In [None]:
env.dymola.close()
from dymola.dymola_interface import DymolaInterface
env.dymola = DymolaInterface()

In [29]:
from dymola.dymola_interface import DymolaInterface
env.dymola = DymolaInterface()
env.dymola.ExecuteCommand("Advanced.Define.DAEsolver = true")

# load libraries
loaded = []
for lib in libs: # all paths relative to the cwd
    loaded += [env.dymola.openModel(lib, changeDirectory=False)]

# if not False in loaded:
#     logger.debug("Successfully loaded all libraries.")
# else:
#     logger.error("Dymola could not find all models.")

if not os.path.isdir('temp_dir'):
    os.mkdir('temp_dir')
    env.temp_dir = os.path.join(os.getcwd(), "temp_dir")
    env.dymola.cd('temp_dir')

In [30]:
env.temp_dir = os.path.join(os.getcwd(), "temp_dir")
env.dymola.cd('temp_dir')

True