## Import Dependencies

In [2]:
#Import GYM stuff
import gymnasium
from gymnasium import Env #--Superclass needed to build our own env
from gymnasium.spaces import Discrete, Box, Dict, Tuple, MultiBinary, MultiDiscrete #--Types of spaces available in gym

#Import helpers
import numpy as np
import random
import os

#Import stablebaselines stuff
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.evaluation import evaluate_policy




## Types of Spaces

### Types of keys

In [131]:
Discrete(2).sample()

0

In [132]:
Box(0, 1, shape = (3, 3)).sample()

array([[0.6645947 , 0.5390494 , 0.43508548],
       [0.06009118, 0.65506786, 0.22427121],
       [0.68648654, 0.5228779 , 0.6603348 ]], dtype=float32)

In [133]:
MultiBinary(4).sample()

array([0, 1, 1, 1], dtype=int8)

In [134]:
MultiDiscrete([4,2,3,1]).sample()

array([2, 0, 1, 0], dtype=int64)

In [135]:
# Stable_baseline3 doesn't support tuple
Tuple((Discrete(2), Box(0, 100, shape = (3,)))).sample()

(0, array([36.202244, 65.630196, 85.160934], dtype=float32))

In [136]:
Dict({'Height':Discrete(2), 'Speed':Box(0, 100, shape = (1,))}).sample()

OrderedDict([('Height', 0), ('Speed', array([43.644222], dtype=float32))])

## Building an Environment
- Build an agent to give us the best shower possible
- Randomize temperature
- 37 to 39 degree

In [3]:
# inheriting the Env class of gymnasium in the class of the environment that we are creating
class ShowerEnv(Env):
    def __init__(self):
        # Actions we can take, down, stay, up
        self.action_space = Discrete(3)
        # Temperature array
        self.observation_space = Box(low=0, high=100, shape = (1,1))
        # Set start temp (which can be a value between 35 and 41
        self.state = 38 + random.randint(-3, 3)
        # Set shower length (length of an episode)
        self.shower_length = 60
    # step function is to apply action to the env and to return the result of the action
    def step(self, action):
        # Apply action
        # 0 -1 = -1 temperature
        # 1 -1 = 0 
        # 2 -1 = 1 temperature 
        self.state += action - 1 
        # Reduce shower length by 1 second
        self.shower_length -= 1 

        # Truncate is used mention whether the episodes are truncated or not
        truncated = False
        
        # Calculate reward
        if self.state >= 37 and self.state <= 39: 
            reward = 1 
        else: 
            reward = -1 
        
        # Check if shower is done
        if self.shower_length <= 0: 
            done = True
        else:
            done = False
        
        # Apply temperature noise
        # self.state += random.randint(-1,1)
        # Set placeholder for info
        info = {}
        
        # Return step information
        return self.state, reward, done, truncated, info

    def render(self):
        # Implement viz
        pass
    
    def reset(self, seed=None):
    # ... your environment initialization logic ...
        if seed is not None:
            # Use the seed to set a random state for your environment
            random.seed(seed)
            
        self.state = np.array([38 + random.randint(-3,3)]).astype(float)
        # Reset shower time
        self.shower_length = 60 

        # the reset method need to return two argument
        info = {}
        # return the situation of the state which is the observation of the state
        return self.state, info

## Test Environment

In [4]:
env = ShowerEnv()

In [5]:
env.observation_space.sample()

array([[53.85317]], dtype=float32)

In [6]:
env.action_space.sample()

0

In [7]:
env.reset()

(array([36.]), {})

In [8]:
episodes = 5
for episode in range(1, episodes + 1):
    obs = env.reset()
    done = False
    score = 0
    
    while not done:
        env.render()
        action = env.action_space.sample()
        obs, reward, done, truncated, info = env.step(action)
        score += reward
    print("Episode: {}, Score: {}".format(episode, score))
env.close()

Episode: 1, Score: -38
Episode: 2, Score: -58
Episode: 3, Score: 32
Episode: 4, Score: -16
Episode: 5, Score: -42


## Train Model

In [9]:
log_path = os.path.join('C:\\', 'Users', 'vyshn', 'Documents System', '6th Sem', 'RL', 'RL_Projects', 'Training', 'Logs')

In [10]:
model = PPO("MlpPolicy", env, verbose = 1, tensorboard_log = log_path)

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


In [11]:
#!pip install shimmy>=0.2.1

In [13]:
model.learn(total_timesteps=100000)

Logging to C:\Users\vyshn\Documents System\6th Sem\RL\RL_Projects\Training\Logs\PPO_24
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 60       |
|    ep_rew_mean     | -22.4    |
| time/              |          |
|    fps             | 738      |
|    iterations      | 1        |
|    time_elapsed    | 2        |
|    total_timesteps | 2048     |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 60          |
|    ep_rew_mean          | -22         |
| time/                   |             |
|    fps                  | 520         |
|    iterations           | 2           |
|    time_elapsed         | 7           |
|    total_timesteps      | 4096        |
| train/                  |             |
|    approx_kl            | 0.010399518 |
|    clip_fraction        | 0.0959      |
|    clip_range           | 0.2         |
|    entropy_loss         |

<stable_baselines3.ppo.ppo.PPO at 0x1c0b304cfd0>

## Save Model

In [17]:
shower_path = os.path.join('C:\\', 'Users', 'vyshn', 'Documents System', '6th Sem', 'RL', 'RL_Projects', 'Training', 'Saved Models', 'Shower_Model_PPO')

In [18]:
model.save(shower_path)

In [19]:
del model

In [20]:
model = PPO.load(shower_path, env)

Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


In [22]:
evaluate_policy(model, env, n_eval_episodes=10, render = False)

(59.2, 0.9797958971132712)