In [69]:
import time
from flatland.utils.rendertools import AgentRenderVariant, RenderTool
from rl.core import Env
from experiments.dqn_test.training_environment import FlatTreeObs, generate_environment
import numpy as np

class FlatlandEnv(Env):
    def __init__(self, n_agents=1, seed=1000, max_steps=200):
        self._seed = seed
        self._obs_builder = FlatTreeObs(1)
        self._env, self._renderer = generate_environment(n_agents, self._obs_builder, seed)
        self._steps = 0
        self._max_steps = max_steps
        self._nb_resets = 0

    def step(self, action):
        # supports only single agent
        # print(action)

        action = 2 if action == 4 else action
        obs, reward, done, info = self._env.step({0: action})
        score = reward[0]
        while not info['action_required'][0] and not done[0]:
            obs, reward, done, info = self._env.step({0: 3})
            score += reward[0]
        # obs = tf.constant([obs[0]], dtype=tf.float32)
        # print(obs)
        obs = obs[0]
        if type(obs) == list:
            obs = np.array(obs)
        else:
            obs = np.zeros(self._obs_builder.state_size())
        #print(obs, reward[0], done[0])

        if self._steps >= self._max_steps:
            done[0] = True,
            score -= 100
        self._steps += 1
        return obs, score, done[0], {}

    def reset(self):
        try:
            self._renderer.close_window()
        except Exception as e:
            pass

        self._steps = 0
        self._nb_resets += 1
        if self._nb_resets > 2:
            random.seed(self._seed)
            self._seed = random.randint(0, 100000)
            self._nb_resets = 0

        obs, _ = self._env.reset(random_seed=self._seed, activate_agents=True)
        self._renderer = RenderTool(
            self._env,
            show_debug=True,
            agent_render_variant=AgentRenderVariant.AGENT_SHOWS_OPTIONS_AND_BOX,
            screen_height=500,
            screen_width=500)

        obs, _, _, _ = self.step(2)
        #self._renderer.reset()
        obs = obs[0]
        obs = np.array(obs)
        if obs.size < self._obs_builder.state_size():
            obs = np.zeros(self._obs_builder.state_size())
        return obs

    def render(self, mode='human', close=False):
        self._renderer.render_env(show=True)
        time.sleep(0.001)

    def close(self):
        self._renderer.close_window()

    def seed(self, seed=None):
        self._seed = seed
        return [seed]

    def configure(self, *args, **kwargs):
        pass

    def action_space(self):
        return self._env.action_space[0]  # returns 5

    def states_space(self):
        return self._obs_builder.state_size()

In [70]:
env = FlatlandEnv()
states = env.states_space()
actions = env.action_space()
print(states)
print(actions)

10
5


In [71]:
import random

episodes = 0# CHANGE Here
for episode in range(1, episodes+1):
    state = env.reset()
    done = False
    score = 0

    while not done:
        env.render()
        action = random.choice([0,1,2,3,4])
        n_state, reward, done, info = env.step(action)
        #print(n_state)
        score += reward
        #time.sleep(0.1)
    time.sleep(1)
    print(f"Episode: {episode}, Score: {score}")
if episodes > 0:
    env.close()


In [72]:
import numpy as np
from keras.models import Sequential
from keras.layers import Dense, Flatten, Input, InputLayer
from keras.optimizers import Adam

In [73]:
def build_model(states, actions):
    _model = Sequential()
    _model.add(Flatten(input_shape=(1,states)))
    _model.add(Dense(20, activation='relu'))
    _model.add(Dense(10, activation='relu'))
    _model.add(Dense(actions, activation='linear'))

    # input = Input(shape=(1,states))
    # x = Flatten(input)
    # x = Dense(24, activation='relu')(x)
    # x = Dense(24, activation='relu')(x)
    # output = Dense(5, activation='linear')(x)
    # model = Model(inputs=)
    return _model

In [74]:
model = build_model(states, actions)
model.summary()

Model: "sequential_8"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten_8 (Flatten)          (None, 10)                0         
_________________________________________________________________
dense_22 (Dense)             (None, 20)                220       
_________________________________________________________________
dense_23 (Dense)             (None, 10)                210       
_________________________________________________________________
dense_24 (Dense)             (None, 5)                 55        
Total params: 485
Trainable params: 485
Non-trainable params: 0
_________________________________________________________________


In [75]:
from rl.agents import DQNAgent          # Deep q learning agent
from rl.policy import BoltzmannQPolicy  # Policy based reinforcement learning
from rl.memory import SequentialMemory  # Memory ??

In [76]:
def build_agent(model, actions):
    policy = BoltzmannQPolicy()
    memory = SequentialMemory(limit=5000, window_length=1)
    dqn = DQNAgent(model=model, memory=memory, policy=policy, nb_actions=actions,
                   nb_steps_warmup=10, target_model_update=1e-2)
    return dqn

In [77]:
dqn = build_agent(model, actions)
dqn.compile(Adam(lr=1e-3), metrics=['mae'])
dqn.fit(env, nb_steps=50000, visualize=False, verbose=2)


Training for 50000 steps ...
   149/50000: episode: 1, duration: 5.304s, episode steps: 149, steps per second: 28, episode reward: -149.000, mean reward: -1.000 [-1.000, -1.000], mean action: 0.235 [0.000, 4.000], mean observation: 197.667 [0.000, 1000.000], loss: 1054.936241, mae: 55.433181, mean_q: 52.904121
   298/50000: episode: 2, duration: 3.168s, episode steps: 149, steps per second: 47, episode reward: -149.000, mean reward: -1.000 [-1.000, -1.000], mean action: 2.369 [0.000, 4.000], mean observation: 136.119 [0.000, 1000.000], loss: 106.385139, mae: 54.704735, mean_q: 51.252617
   447/50000: episode: 3, duration: 3.193s, episode steps: 149, steps per second: 47, episode reward: -149.000, mean reward: -1.000 [-1.000, -1.000], mean action: 1.866 [0.000, 4.000], mean observation: 146.736 [0.000, 1000.000], loss: 24.885792, mae: 49.546009, mean_q: 44.238796
   596/50000: episode: 4, duration: 3.089s, episode steps: 149, steps per second: 48, episode reward: -149.000, mean reward: 

Exception ignored in: <bound method Env.__del__ of <__main__.FlatlandEnv object at 0x0000020E09AB2630>>
Traceback (most recent call last):
  File "C:\Users\pqman\.conda\envs\flatland-rl\lib\site-packages\rl\core.py", line 686, in __del__
    self.close()
  File "<ipython-input-49-4d45ef41be0e>", line 74, in close
  File "C:\Users\pqman\.conda\envs\flatland-rl\lib\site-packages\flatland\utils\rendertools.py", line 66, in close_window
    self.renderer.close_window()
  File "C:\Users\pqman\.conda\envs\flatland-rl\lib\site-packages\flatland\utils\rendertools.py", line 771, in close_window
    self.gl.close_window()
  File "C:\Users\pqman\.conda\envs\flatland-rl\lib\site-packages\flatland\utils\graphics_pgl.py", line 51, in close_window
    self.window.close()
AttributeError: 'PGLGL' object has no attribute 'window'
Exception ignored in: <bound method Env.__del__ of <__main__.FlatlandEnv object at 0x0000020E08F04080>>
Traceback (most recent call last):
  File "C:\Users\pqman\.conda\envs\fl

<keras.callbacks.callbacks.History at 0x20e84f0d7f0>

In [78]:
scores = dqn.test(env, nb_episodes=10, visualize=False)
print(np.mean(scores.history['episode_reward']))

Testing for 10 episodes ...
Episode 1: reward: -38.000, steps: 40
Episode 2: reward: -38.000, steps: 40
Episode 3: reward: -1.000, steps: 3
Episode 4: reward: -1.000, steps: 3
Episode 5: reward: -1.000, steps: 3
Episode 6: reward: -149.000, steps: 149
Episode 7: reward: -149.000, steps: 149
Episode 8: reward: -149.000, steps: 149
Episode 9: reward: -149.000, steps: 149
Episode 10: reward: -149.000, steps: 149
-82.4


In [81]:
_ = dqn.test(env, nb_episodes=2, visualize=True)
env.close()

Testing for 2 episodes ...
open_window - pyglet
Episode 1: reward: -149.000, steps: 149
open_window - pyglet
Episode 2: reward: -149.000, steps: 149


In [80]:
model.save('model')
dqn.save_weights('dqn_weights.hdf5', overwrite=True)
