In [2]:
from gym import Env
from gym.spaces import Discrete, Box
import numpy as np
import random


from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.optimizers import Adam

from rl.agents import DQNAgent
from rl.policy import BoltzmannQPolicy
from rl.memory import SequentialMemory

In [3]:
class BestToast(Env):
    def __init__(self):
        
        #Action we can take, down, up and stay
        self.action_space = Discrete(3)
        # temperature 
        self.observation_space = Box(low = np.array([0]), high = np.array([250]))
        # set start temperature
        self.state = 156 + random.randint(-2,2)
        #set cooking time
        self.cooking_time = 216


    def step(self, action):
        """"
        Action list= 
        0 -1 = -1 temperature
        1 -1 =  0 temperature
        2 -1=  +1 temperature
        """
        self.state += action -1

        # reduce cooking time by 1 sec
        self.cooking_time -= 1

        # reward
        if self.state >=154 and self.state <= 158:
            reward =1
        else:
            reward = -1


     # if cooking is done stop
        if self.cooking_time <= 0:
            done = True
        else:
            done = False

        # Apply temperature noise
        #self.state += random.randint(-1,1)
        # Set placeholder for info
        info = {}

        # Return step information
        return self.state, reward, done, info

    def render(self):
        pass

    def reset(self):
        # Reset shower temperature
        self.state = 156 + random.randint(-2,2)
        # Reset shower time
        self.shower_length =  216
        return self.state


In [4]:

env = BestToast()



In [5]:
states = env.observation_space.shape
actions = env.action_space.n

In [6]:
def build_model(states, actions):
    model = Sequential()
    model.add(Dense(24, activation = "relu", input_shape = states))
    model.add(Dense(24, activation = "relu"))
    model.add(Dense(actions, activation = "linear"))
    return model

In [7]:
model = build_model(states, actions)

In [8]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 24)                48        
_________________________________________________________________
dense_1 (Dense)              (None, 24)                600       
_________________________________________________________________
dense_2 (Dense)              (None, 3)                 75        
Total params: 723
Trainable params: 723
Non-trainable params: 0
_________________________________________________________________


In [9]:
def build_agent(model, actions):
    policy = BoltzmannQPolicy()
    memory = SequentialMemory(limit = 30000, window_length = 1)
    dqn = DQNAgent(model = model, memory = memory, policy = policy, nb_actions = actions, nb_steps_warmup = 10, target_model_update = 1e-2)
    return dqn

In [10]:
dqn = build_agent(model, actions)
dqn.compile(Adam(lr = 1e-3), metrics = ["mae"])
dqn.fit(env, nb_steps = 30000, visualize = False, verbose = 1)



Training for 30000 steps ...
Interval 1 (0 steps performed)
    1/10000 [..............................] - ETA: 7:37 - reward: 1.0000



9785 episodes - episode_reward: 0.807 [-1.000, 216.000] - loss: 3.753 - mae: 1.264 - mean_q: 3.122

Interval 2 (10000 steps performed)
10000 episodes - episode_reward: 0.771 [-1.000, 1.000] - loss: 0.182 - mae: 0.598 - mean_q: 1.019

Interval 3 (20000 steps performed)
done, took 314.937 seconds


<tensorflow.python.keras.callbacks.History at 0x7fe90a623820>

In [11]:
scores = dqn.test(env, nb_episodes=20, visualize=False)
print(np.mean(scores.history['episode_reward']))

Testing for 20 episodes ...
Episode 1: reward: 1.000, steps: 1
Episode 2: reward: 1.000, steps: 1
Episode 3: reward: 1.000, steps: 1
Episode 4: reward: 1.000, steps: 1
Episode 5: reward: 1.000, steps: 1
Episode 6: reward: 1.000, steps: 1
Episode 7: reward: 1.000, steps: 1
Episode 8: reward: 1.000, steps: 1
Episode 9: reward: 1.000, steps: 1
Episode 10: reward: 1.000, steps: 1
Episode 11: reward: 1.000, steps: 1
Episode 12: reward: 1.000, steps: 1
Episode 13: reward: 1.000, steps: 1
Episode 14: reward: 1.000, steps: 1
Episode 15: reward: 1.000, steps: 1
Episode 16: reward: 1.000, steps: 1
Episode 17: reward: 1.000, steps: 1
Episode 18: reward: 1.000, steps: 1
Episode 19: reward: 1.000, steps: 1
Episode 20: reward: 1.000, steps: 1
1.0


In [12]:
# Reference https://github.com/nicknochnack/OpenAI-Reinforcement-Learning-with-Custom-Environment