In [1]:
import gymnasium as gym
from gymnasium import Env
from gymnasium.spaces import Discrete, Box, Tuple, Dict, MultiDiscrete, MultiBinary

import numpy as np
import random
import os

from  stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.evaluation import evaluate_policy 

In [68]:
class ShowerEnv(Env):
    def __init__(self) -> None:
        self.action_space = Discrete(3)
        self.observation_space = Box(low=np.array([0]), high=np.array([100]))
        self.state = np.array([38 + random.randint(-3,3)]).astype(int)
        self.shower_length = 60

    def step(self, action):
        self.state += action -1
        self.shower_length -= 1

        if self.state >= 37 and self.state <= 39:
            reward = 1
        else:
            reward = -1

        if self.shower_length <= 0:
            done = True
        else:
            done = False

        trunced = False
        
        return self.state, reward, done, trunced, {}
    
    def reset(self, **kwargs):
        self.state = np.array([38 + random.randint(-3,3)]).astype(int)
        self.shower_length = 60
        return (self.shower_length, {})
    
    def render(self):
        pass

In [69]:
env = ShowerEnv()
env = DummyVecEnv([lambda: env]) # type: ignore
env.reset()

  gym.logger.warn(f"Box bound precision lowered by casting to {self.dtype}")


array([[60.]], dtype=float32)

In [70]:
shower_model = PPO("MlpPolicy", env, verbose=0, n_epochs=100, learning_rate=0.001, batch_size=128)

In [71]:
shower_model.learn(total_timesteps=5000)

<stable_baselines3.ppo.ppo.PPO at 0x1c38afa94d0>

In [77]:
model_path = os.path.join('Training', 'Saved Models', 'shower_model')
shower_model.save(model_path)

In [78]:
del shower_model

shower_model = PPO.load(model_path, env=env)

In [79]:
evaluate_policy(shower_model, env, n_eval_episodes=5, render=False)



(59.6, 0.7999999999999999)