In [None]:
# Install depedencies

import gym
from gym import Env
from gym.spaces import Discrete, Box, Tuple, Dict, MultiBinary, MultiDiscrete

# import helper
import numpy as np
import random
import os

# Import stable baseline
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.evaluation import evaluate_policy

In [None]:
## Type of spaces

# this discrete line gives us 3 actions
Discrete(3) # 0, 1, 2

Discrete(3).sample() # 2

In [None]:
# this box line gives us 
Box(0,1, shape=(3,3))

In [None]:
Box(0,1, shape=(3,3)).sample

In [None]:
# this tuple line gives us
Tuple((Discrete(3), Box(0,1, shape=(3,3)))) # Tuple is a combination of discrete and box

In [None]:
Tuple((Discrete(3), Box(0,1, shape=(3,3)))).sample # Tuple is a combination of discrete and box


In [None]:
Dict({'height': Discrete(2), 'speed': Box(0,100, shape=(1,)), "color": MultiBinary(4)}) # Dict is a combination of discrete and box

In [None]:
MultiBinary(4).sample() # 4 binary values

In [None]:
MultiDiscrete([5,2,2,5]).sample()

# Building the environment
- Build an agent to give us the best shower possible
- Random temperature
- 37 and 39 degrees

In [None]:
class showerEnv(Env):
    def __init__(self):
        self.action_space = Discrete(3) # 0, 1, 2, do we leave the tap on, off, or remains the same
        self.observation_space = Box(low=0, high=100, shape=(1,)) # 0-100 % humidity
        self.state = 38 + random.randint(-3,3)
        self.shower_length = 60 # 60 seconds

    def step(self, action):

        # Apply temp change
        self.state += action -1 # -1, 0, 1

        # Reduce shower length by 1 second
        self.shower_length -= 1

        # Calculate reward
        if self.state >=37 and self.state <=39: # healthy range
            reward =1
        else:
            reward = -1

        if self.shower_length <=0:
            done = True
        else:
            done = False

        info = {}

        return self.state, reward, done, info

    def reset(self):
        self.state = np.array([38 + random.randint(-3,3)]).astype(float) # 38 is the average body temp + random number between -3 and 3
        self.shower_length = 60 # resetting the shower length to 60 seconds
    def render(self):
        pass

In [None]:
env = showerEnv()
env.observation_space.sample()

In [None]:
env.action_space.sample()

In [None]:
env.reset()

# Test Environment

In [None]:
episodes = 5
for episode in range (1, episodes+1):
    obs = env.reset()
    done = False
    score = 0

    while not done:
        env.render()
        action = env.action_space.sample()
        obs, reward, done, info = env.step(action)
        score += reward
    print('Episode:{} Score:{}'.format(episode, score))

# Train the model

In [None]:
log_path = os.path.join('Training', 'Logs') 
model = PPO('MlpPolicy', env, verbose=1, tensorboard_log=log_path)

In [None]:
model.learn(total_timesteps=40000)

# Save Model

In [None]:
shower_path = os.path.join('Training', 'Saved Models', 'Shower_Model_PPO')
model.save(shower_path)

In [None]:
del model

In [None]:
model = PPO.load(shower_path, env=env)

In [None]:
evaluate_policy(model, env, n_eval_episodes=10, render=True)