In [1]:
import torch
print(torch.__version__)


2.3.1+cpu


In [39]:
#-- Action we can take to the environment and current state of that environment (Discrete, Box)
#-- Discrete : Discrete Space (Action Number)
#-- Box : Box Space (set 1-100 points)
import random
import numpy as np
import matplotlib.pyplot as plt
from gym import Env
from gym.spaces import Discrete, Box 
from rl.agents import DQNAgent
from rl.policy import BoltzmannQPolicy
from rl.memory import SequentialMemory
from keras.callbacks import Callback
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import Dense, Flatten
from stable_baselines3 import A2C

## Air Condition Temperature Range is between 20-30 Only

In [42]:
class ShowerEnv(Env):
    def __init__(self, temp_input):
        self.temp_input = temp_input
        #-- Set action type : decrease, stay, increase
        self.action_space = Discrete(3)
        #-- Temperature Range (1 - 100)
        self.observation_space = Box(low=np.array([0]), high=np.array([100]))
        #-- Set initial temperature
        self.state = self.temp_input + random.randint(-4, 4)
        #-- Set shower length (60 sec)
        self.shower_length = 120

    def step(self, action):
        #-- Action Type
        self.state += action - 1 
        #-- Reduce shower length by 1 second
        self.shower_length -= 1 

        #-- Reward calculation
        if self.state >= (self.temp_input - 2) and self.state <= (self.temp_input + 2): 
            reward = 1 
        else: 
            reward = -1 
        
        #-- Check if shower length is 0 : done
        done = self.shower_length <= 0
        
        info = {}

        # Return state as a numpy array with the correct shape (1,)
        return np.array([self.state]), reward, done, info

    def render(self):
        pass
    
    def reset(self, temp_input=None):
        if temp_input is not None:
            self.temp_input = temp_input
        self.state = self.temp_input + random.randint(-4, 4)
        self.shower_length = 120

        # Return state as a numpy array with the correct shape (1,)
        return np.array([self.state])


In [148]:
import random
from stable_baselines3 import DQN , PPO
from stable_baselines3.common.env_checker import check_env

# Example environment check (make sure your environment is valid)
env = ShowerEnv(temp_input=25)  # Initial temp_input for the check

# Initialize the DQN model
model = DQN("MlpPolicy", env, verbose=1)

# Training loop with dynamic temp_input
for episode in range(500):  # Number of training episodes
    # Set a random temp_input for each episode (or follow a specific schedule)
    temp_input = random.randint(21, 30)  # Example range for temp_input
    
    # Reset environment with new temp_input
    env.reset(temp_input=temp_input)
    
    # Train for the desired number of timesteps
    model.learn(total_timesteps=1000)  # Adjust timesteps as needed

# Save the trained model
model.save("dqn_shower_env")


  logger.warn(f"Box bound precision lowered by casting to {self.dtype}")


Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 120      |
|    ep_rew_mean     | -54.2    |
| time/              |          |
|    fps             | 1803     |
|    iterations      | 1        |
|    time_elapsed    | 1        |
|    total_timesteps | 2048     |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 120      |
|    ep_rew_mean     | -47.2    |
| time/              |          |
|    fps             | 1743     |
|    iterations      | 1        |
|    time_elapsed    | 1        |
|    total_timesteps | 2048     |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 120      |
|    ep_rew_mean     | -32.4    |
| time/              |          |
|    fps             | 1911     |
|    iterations      |

In [188]:
# Load the saved model
model = DQN.load("dqn_shower_env")

# Test the model with a new temp_input
test_temp_input = 27
total_reward = 0
state = env.reset(temp_input=test_temp_input)

done = False
while not done:
    action, _ = model.predict(state)
    state, reward, done, info = env.step(action)
    total_reward = total_reward + reward
    env.render()
print(total_reward)

120


In [189]:
import random
import numpy as np
from stable_baselines3 import DQN
from stable_baselines3.common.vec_env import SubprocVecEnv
from gym import Env
from gym.spaces import Discrete, Box

# Define the ShowerEnv class
class ShowerEnv(Env):
    def __init__(self, temp_input):
        self.temp_input = temp_input
        # Action space: decrease, stay, increase
        self.action_space = Discrete(3)
        # Observation space: temperature (0 - 100)
        self.observation_space = Box(low=np.array([0]), high=np.array([100]), dtype=np.float32)
        # Set initial temperature
        self.state = self.temp_input + random.randint(-5, 5)
        # Set shower length (60 seconds)
        self.shower_length = 120

    def step(self, action):
        # Apply action: decrease (-1), stay (0), or increase (+1)
        self.state += action - 1
        # Reduce shower length
        self.shower_length -= 1

        # Calculate reward: +1 if temperature between temp_input - 2 and temp_input + 2, else -1
        if self.state >= (self.temp_input - 2) and self.state <= (self.temp_input + 2):
            reward = 1
        else:
            reward = -1

        # Check if shower is over
        done = self.shower_length <= 0

        info = {}
        # Return state as a numpy array with shape (1,)
        return np.array([self.state]), reward, done, info

    def render(self):
        pass

    def reset(self, temp_input=None):
        if temp_input is not None:
            self.temp_input = temp_input
        self.state = self.temp_input + random.randint(-5, 5)
        self.shower_length = 120
        # Return the initial state as a numpy array with shape (1,)
        return np.array([self.state])


# Function to create the environment
def make_env(temp_input):
    def _init():
        env = ShowerEnv(temp_input=temp_input)
        return env
    return _init


# Create vectorized environment with different temp_input values
env_fns = [make_env(temp_input=38), make_env(temp_input=42)]
env = SubprocVecEnv(env_fns)  # Creates 2 environments running in parallel


# Initialize the DQN model
model = DQN("MlpPolicy", env, verbose=1)

# Train the model
model.learn(total_timesteps=10000)

# Save the model
model.save("dqn_shower_env_multi")

# Load and test the trained model
model = DQN.load("dqn_shower_env_multi")

# Test the model with the vectorized environment
state = env.reset()

done = [False for _ in range(env.num_envs)]  # Keep track of whether each environment is done



Using cpu device
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.544    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 1628     |
|    time_elapsed     | 0        |
|    total_timesteps  | 480      |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.16     |
|    n_updates        | 47       |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.088    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 1501     |
|    time_elapsed     | 0        |
|    total_timesteps  | 960      |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.273    |
|    n_updates        | 107      |
----------------------------------
----------------------------------
| rollout/            |          |
|  



In [206]:
total_reward = 0
while not all(done):
    action, _ = model.predict(state)
    state, reward, done, info = env.step(action)
    total_reward = total_reward + reward
    env.render()
display(total_reward)


0