# 1. Import dependencies

In [None]:
# Import Gym stuff
import gymnasium as gym
from gymnasium import Env
# All type of spaces
from gymnasium.spaces import Discrete, Box, Dict, Tuple, MultiBinary, MultiDiscrete

# Import helpers
import numpy as np
import random
import os

# Import stable-baselins stuff
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.evaluation import evaluate_policy

# 2. Type of Spaces

In [None]:
# Value 0, 1 or 2
Discrete(3).sample()

In [None]:
# 3x3 box between 0 and 1
Box(0, 1, shape=(3, 3)).sample()

# This are the same
Box(low=np.array([0]), high=np.array([100]))
Box(low=0, high=1, shape=1,)

In [None]:
Tuple((Discrete(3), Box(0, 1, shape=(3,)))).sample()

In [None]:
# Heigth 0 or 1 and speed between 0-100
Dict({'heigth': Discrete(2), 'speed': Box(0, 100, shape=(1,))}).sample()

In [None]:
# Four positions 0 or 1
MultiBinary(4).sample()

In [None]:
# Three values, first 0, second 0 or 1 and third 0, 1 or 2
MultiDiscrete([1, 2, 3]).sample()

In [None]:
Box(low=np.array([0]), high=np.array([100]))

In [None]:
np.array([0])

# 3. Building and Environment
- Build an agent to give us the best shower posible
- Randomly temperature
- Best shower between 37 and 39 degrees
- The agent doesn't know that so it has to learn

In [None]:
class ShowerEnv(Env):
    def __init__(self):
        # Actions we can take: tap down, stay, up
        self.action_space = Discrete(3)
        # Temperature array
        self.observation_space = Box(low=np.array([0]), high=np.array([100]))
        # Set start temp
        self.state = 38 + random.randint(-3, 3)
        # Set shower length
        self.shower_length = 60

    def step(self, action):
        # Apply action
        # decrease 0 -1 = -1 temperature
        # unchange 1 -1 = 0
        # increase 2 -1 = 1 temperature
        self.state += action - 1
        # Reduce shower length by 1 second
        self.shower_length -= 1

        # Calculate reward
        if self.state >= 37 and self.state <= 39:
            reward = 1
        else:
            reward = -1

        # Check if shower is done
        if self.shower_length <= 0:
            done = True
        else:
            done = False

        # Apply temperature noise
        # self.state += random.randint(-1,1)
        # Set placeholder for info
        info = {}

        # Return step information
        return self.state, reward, done, False, info

    def render(self):
        # Implement viz
        pass

    def reset(self, seed=None):
        super().reset(seed=seed)
        # Reset shower temperature
        self.state = np.array([38 + random.randint(-3, 3)], dtype=np.float32)
        # Reset shower time
        self.shower_length = 60
        info = {}
        return self.state, info

In [None]:
env = ShowerEnv()

In [None]:
env.observation_space.sample()

In [None]:
env.action_space.sample()

# 4. Test environment

In [None]:
episodes = 5

for episode in range(1, episodes + 1):
    obs = env.reset()
    done = False
    score = 0
    while not done:
        action = env.action_space.sample()  # Use our model here
        obs, reward, done, info = env.step(action)
        score += reward
    print('Episode: {} Score: {}'.format(episode, score))
env.close()

In [None]:
from stable_baselines3.common.env_checker import check_env
check_env(env, warn=True)

# 5. Traing Model

In [None]:
log_path = os.path.join('Training', 'Logs')
model = PPO('MlpPolicy', env, verbose=1, tensorboard_log=log_path)

In [None]:
model.learn(total_timesteps=80000)

# 6. Save the model

In [None]:
shower_path = os.path.join('Training', 'Saved Models', 'Shower_Model')

In [None]:
model.save(shower_path)

In [None]:
del model

In [None]:
model = PPO.load(shower_path)

In [None]:
evaluate_policy(model, env, n_eval_episodes=10, render=True)