In [1]:
import gymnasium as gym
import numpy as np
import math
import os
import configparser
from sb3_contrib.common.maskable.policies import MaskableActorCriticPolicy
from sb3_contrib.common.wrappers import ActionMasker
from sb3_contrib.ppo_mask import MaskablePPO
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.evaluation import evaluate_policy
import matplotlib.pyplot as plt
from sb3_contrib.common.maskable.utils import get_action_masks



In [2]:
from src.hpc_env import HPCenv
from src.validation import Validation
from src.baseline import MedianBaseline
from src.utils import mask_fn
from src.carbon_intensity import CarbonIntensity

In [3]:
WORKLOAD_PATH = "data/workloads/lublin_256.swf"

# Load config with explicit path and typed parsing
config = configparser.ConfigParser()
config_path = os.path.join(os.getcwd(), 'config_file', 'config.ini')
config.read(config_path)

GAE_LAMBDA = config.getfloat('training', 'gae_lambda')
GAMMA = config.getfloat('training', 'gamma')
EPISODE_LENGTH = config.getint('training', 'episode_length')

## Model training

In [6]:
env = HPCenv(workload_path=WORKLOAD_PATH, config=config)

env = ActionMasker(env, mask_fn) 
env = Monitor(env)


policy_kwargs = dict(
    #feature_extract_kwargs = dict(feature_dims=256), 
    net_arch=dict(pi=[512, 512], vf=[512, 512])
)
model = MaskablePPO("MlpPolicy", env, verbose=1,
                    gamma=GAMMA,
                    gae_lambda=GAE_LAMBDA,
                    seed=43,
                    n_epochs=20,
                    batch_size=10240,
                    policy_kwargs=policy_kwargs,
                    n_steps=100000,
                    )
model.learn(total_timesteps=10000000)  

run time mean:  22.101
run time std:  44.0978298672395
Max Allocated Processors: 256 ;max node: 256 ;max procs: 256 ;max execution time: 124707
Using cpu device
Wrapping the env in a DummyVecEnv.
[ 0.         -0.22905889 -0.99917742 -0.33402055  1.        ]
[ 0.         -0.22905889 -0.9991774  -0.33402056  1.          0.
  0.          0.          0.          0.          0.          0.
  0.          0.          0.          0.          0.          0.
  0.          0.          0.          0.          0.          0.
  0.          0.          0.          0.          0.          0.
  0.          0.          0.          0.          0.          0.
  0.          0.          0.          0.          0.          0.
  0.          0.          0.          0.          0.          0.
  0.          0.          0.          0.          0.          0.
  0.          0.          0.          0.          0.          0.
  0.          0.          0.          0.          0.          0.
  0.          0.          0

KeyboardInterrupt: 

In [None]:
model.policy

MaskableActorCriticPolicy(
  (features_extractor): FlattenExtractor(
    (flatten): Flatten(start_dim=1, end_dim=-1)
  )
  (pi_features_extractor): FlattenExtractor(
    (flatten): Flatten(start_dim=1, end_dim=-1)
  )
  (vf_features_extractor): FlattenExtractor(
    (flatten): Flatten(start_dim=1, end_dim=-1)
  )
  (mlp_extractor): MlpExtractor(
    (policy_net): Sequential(
      (0): Linear(in_features=1439, out_features=64, bias=True)
      (1): Tanh()
      (2): Linear(in_features=64, out_features=64, bias=True)
      (3): Tanh()
    )
    (value_net): Sequential(
      (0): Linear(in_features=1439, out_features=64, bias=True)
      (1): Tanh()
      (2): Linear(in_features=64, out_features=64, bias=True)
      (3): Tanh()
    )
  )
  (action_net): Linear(in_features=64, out_features=273, bias=True)
  (value_net): Linear(in_features=64, out_features=1, bias=True)
)

## Model validation

In [None]:
val = Validation(model_path="model_test.zip",config=config, workload_path=WORKLOAD_PATH, baselines=[MedianBaseline])

Max Allocated Processors: 256 ;max node: 256 ;max procs: 256 ;max execution time: 124707
Max Allocated Processors: 256 ;max node: 256 ;max procs: 256 ;max execution time: 124707
Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


In [None]:
reward = val.compare(n_eval_episodes=5)
print(reward)

Random offset 0
Random offset 0
Random offset 0
Random offset 0
Random offset 0
Random offset 0
Random offset 0
Random offset 0
Random offset 0
Random offset 0
{'model': [-891650.2438916663, -589906.7321333336, -741800.2802305555, -1011459.3452569448, -997526.2685777775], 'Median Baseline': [-856284.5043388885, -571263.5770069446, -670389.495909722, -958111.7460583333, -980250.7900430561]}
