In [5]:
from gymnasium import Env
from gymnasium.spaces import Discrete, Box
import numpy as np
import random

### Functions to implement

- `__init__`
- `step`
- `render`
- `reset`


In [None]:

class ShowerEnv(Env):
    def __init__(self):
        self.action_space = Discrete(3)
        
        self.observation_space = Box(low=np.array([0]), high=np.array([100]), dtype=np.float32)

        self.state = 38 + random.randint(-3, 3)

        self.shower_length = 60

    def step(self, action):
        # Apply action
        self.state += action - 1
        self.shower_length -= 1

        # Reward logic
        reward = 1 if 37 <= self.state < 39 else -1

        # Check if done
        terminated = self.shower_length <= 0
        truncated = False

        # Add random noise
        self.state += random.randint(-1, 1)

        return np.array([self.state], dtype=np.float32), reward, terminated, truncated, {}

    def reset(self, *, seed=None, options=None):

        super().reset(seed=seed)

        self.state = 38 + random.randint(-3, 3)

        self.shower_length = 60
        
        return np.array([self.state], dtype=np.float32), {}

    def render(self):
        pass


In [None]:
from stable_baselines3 import DQN

env = ShowerEnv()

model = DQN(
    policy="MlpPolicy",
    env=env,
    learning_rate=1e-3,
    buffer_size=10000,
    learning_starts=1000,
    batch_size=64,
    gamma=0.99,
    verbose=1
)

model.learn(total_timesteps=50000)

# Save the model
# model.save("dqn_shower_env")

obs, _ = env.reset()
total_reward = 0
for _ in range(60):
    action, _states = model.predict(obs, deterministic=True)
    obs, reward, done, truncated, info = env.step(action)
    total_reward += reward
    if done or truncated:
        break

print("Total reward:", total_reward)


Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 60       |
|    ep_rew_mean      | -38      |
|    exploration_rate | 0.954    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 39915    |
|    time_elapsed     | 0        |
|    total_timesteps  | 240      |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 60       |
|    ep_rew_mean      | -42      |
|    exploration_rate | 0.909    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 32510    |
|    time_elapsed     | 0        |
|    total_timesteps  | 480      |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 60       |
|    ep_rew_mean      | -40.7 

Gym has been unmaintained since 2022 and does not support NumPy 2.0 amongst other critical functionality.
Please upgrade to Gymnasium, the maintained drop-in replacement of Gym, or contact the authors of your software and request that they upgrade.
Users of this version of Gym should be able to simply replace 'import gym' with 'import gymnasium as gym' in the vast majority of cases.
See the migration guide at https://gymnasium.farama.org/introduction/migration_guide/ for additional information.


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 60       |
|    ep_rew_mean      | -38.8    |
|    exploration_rate | 0.772    |
| time/               |          |
|    episodes         | 20       |
|    fps              | 2698     |
|    time_elapsed     | 0        |
|    total_timesteps  | 1200     |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 0.35     |
|    n_updates        | 49       |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 60       |
|    ep_rew_mean      | -40.2    |
|    exploration_rate | 0.726    |
| time/               |          |
|    episodes         | 24       |
|    fps              | 2301     |
|    time_elapsed     | 0        |
|    total_timesteps  | 1440     |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 0.247    |
|    n_updates      