## 1. Install Libs

In [1]:
#-- Action we can take to the environment and current state of that environment (Discrete, Box)
#-- Discrete : Discrete Space (Action Number)
#-- Box : Box Space (set 1-100 points)
import random
import numpy as np
import matplotlib.pyplot as plt
from gym import Env
from gym.spaces import Discrete, Box 
from rl.agents import DQNAgent
from rl.policy import BoltzmannQPolicy
from rl.memory import SequentialMemory
from keras.callbacks import Callback
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import Dense, Flatten


## 2. Create Own Custom RL Environment

In [3]:
#-- Environment Class for Shower Temp 37 to 39 (shower length is 60 second), try to optimal temp as long as possible
class ShowerEnv(Env):
    def __init__(self):
        #-- Set action type : decrease, stay, increase
        self.action_space = Discrete(3)
        #-- Temperature Range (1 - 100)
        self.observation_space = Box(low=np.array([0]), high=np.array([100]))
        #-- Set initial temperature
        self.state = 38 + random.randint(-5,5)
        #-- Set shower length (60sec)
        self.shower_length = 120
    
    def step(self, action):
        #-- Action Type
        # 0 -1 = -1 decrease temperature
        # 1 -1 = 0 
        # 2 -1 = 1 increase temperature 
        self.state += action -1 
        #-- Reduce shower length by 1 second
        self.shower_length -= 1 
        
        #-- Reward calculation
        if self.state >=37 and self.state <=39: 
            reward =1 
        else: 
            reward = -1 
        
        #-- Check if shower length is 0 : done
        if self.shower_length <= 0: 
            done = True
        else:
            done = False
        
        #-- 
        info = {}
        return self.state, reward, done, info

    def render(self):
        pass
    
    def reset(self):
        #-- Reset shower temperature
        self.state = 38 + random.randint(-5,5)
        #-- Reset shower length
        self.shower_length = 120
        return self.state
    

In [4]:
#-- Test Custom Environment Action and Observation (State)
env = ShowerEnv()
#-- From Discrete which we create : Value will between 0 to 2 which are 0, 1, 2
print("Action :",env.action_space.sample())
#-- From Box which we create : Value will between 1 to 100 
print("State :", env.observation_space.sample())

Action : 0
State : [97.6204]


  logger.warn(f"Box bound precision lowered by casting to {self.dtype}")


# 3. Build Deep Learning Model

In [5]:
#-- state is the number of input data shape
#-- actions = number of output data type
#-- Deep Learning Model
def build_model(states, actions):
    model = Sequential()    
    model.add(Dense(128, activation='relu', input_shape=states))
    model.add(Dense(128, activation='relu'))
    model.add(Dense(actions, activation='linear'))
    return model

#-- Build Deep Learning Model
states = env.observation_space.shape
actions = env.action_space.n
model = build_model(states, actions)
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 128)               256       
                                                                 
 dense_1 (Dense)             (None, 128)               16512     
                                                                 
 dense_2 (Dense)             (None, 3)                 387       
                                                                 
Total params: 17,155
Trainable params: 17,155
Non-trainable params: 0
_________________________________________________________________


# 4. Build RL Agent Model 

In [6]:
#-- Callback Function : Reward Store 
class RewardLogger(Callback):
    def __init__(self):
        self.episode_rewards = []
        self.total_rewards = 0

    def on_episode_begin(self, episode, logs={}):
        self.total_rewards = 0

    def on_step_end(self, step, logs={}):
        self.total_rewards += logs['reward']

    def on_episode_end(self, episode, logs={}):
        self.episode_rewards.append(self.total_rewards)
    
    def plot_rewards(self):
        plt.plot(self.episode_rewards)
        plt.title("Total Reward per Episode")
        plt.xlabel("Episode")
        plt.ylabel("Total Reward")
        plt.show()

In [75]:
#-- Clear model cache and rebuild deep lerning model
del model 
model = build_model(states, actions)

reward_logger = RewardLogger()

#-- Keras Rl model 
def build_agent(model, actions):
    policy = BoltzmannQPolicy()
    memory = SequentialMemory(limit=100000, window_length=1)
    #-- weight update every 0.1% change
    dqn = DQNAgent(model=model, memory=memory, policy=policy, 
                  nb_actions=actions, nb_steps_warmup=1000, target_model_update=1e-3)
    return dqn

#-- Build Rl model
dqn = build_agent(model, actions)
dqn.compile(Adam(lr=1e-3), metrics=['mae'])
dqn.fit(env, nb_steps=40000, visualize=False, verbose=1, callbacks=[reward_logger])

#-- Plot toal reward
reward_logger.plot_rewards()

Training for 40000 steps ...
Interval 1 (0 steps performed)
83 episodes - episode_reward: -75.060 [-120.000, 62.000] - loss: 0.234 - mae: 2.210 - mean_q: -2.951

Interval 2 (10000 steps performed)
83 episodes - episode_reward: 91.229 [-18.000, 120.000] - loss: 0.234 - mae: 3.304 - mean_q: -1.792

Interval 3 (20000 steps performed)
84 episodes - episode_reward: 105.810 [92.000, 116.000] - loss: 0.583 - mae: 6.824 - mean_q: 9.058

Interval 4 (30000 steps performed)
done, took 179.468 seconds


<keras.callbacks.History at 0x298fdb9f640>

## 5. Running the Agent Environment

In [77]:
#-- Running the Agent Environment
scores = dqn.test(env, nb_episodes=100, visualize=False)
print(np.mean(scores.history['episode_reward']))

Testing for 100 episodes ...
Episode 1: reward: 120.000, steps: 120
Episode 2: reward: 118.000, steps: 120
Episode 3: reward: 120.000, steps: 120
Episode 4: reward: 120.000, steps: 120
Episode 5: reward: 118.000, steps: 120
Episode 6: reward: 114.000, steps: 120
Episode 7: reward: 120.000, steps: 120
Episode 8: reward: 114.000, steps: 120
Episode 9: reward: 120.000, steps: 120
Episode 10: reward: 120.000, steps: 120
Episode 11: reward: 114.000, steps: 120
Episode 12: reward: 114.000, steps: 120
Episode 13: reward: 114.000, steps: 120
Episode 14: reward: 116.000, steps: 120
Episode 15: reward: 120.000, steps: 120
Episode 16: reward: 116.000, steps: 120
Episode 17: reward: 114.000, steps: 120
Episode 18: reward: 120.000, steps: 120
Episode 19: reward: 120.000, steps: 120
Episode 20: reward: 120.000, steps: 120
Episode 21: reward: 118.000, steps: 120
Episode 22: reward: 114.000, steps: 120
Episode 23: reward: 120.000, steps: 120
Episode 24: reward: 120.000, steps: 120
Episode 25: reward: 