In [1]:
import os
os.environ["CUDA_VISIBLE_DEVICES"]="-1"
import tensorflow as tf
import gym
from keras.models import Sequential
from keras.layers import Dense, Embedding, Reshape
from keras.optimizers import Adam
from rl.agents.dqn import DQNAgent
from rl.policy import EpsGreedyQPolicy
from rl.memory import SequentialMemory

In [2]:
env = gym.make("Taxi-v3").env

In [3]:
action_size = env.action_space.n

model = Sequential()
model.add(Embedding(500, 10, input_length=1))
model.add(Reshape((10,)))
model.add(Dense(50, activation='relu'))
model.add(Dense(50, activation='relu'))
model.add(Dense(50, activation='relu'))
model.add(Dense(action_size, activation='linear'))
print(model.summary())

Metal device set to: Apple M1


2022-05-24 22:56:37.648987: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2022-05-24 22:56:37.650651: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 1, 10)             5000      
                                                                 
 reshape (Reshape)           (None, 10)                0         
                                                                 
 dense (Dense)               (None, 50)                550       
                                                                 
 dense_1 (Dense)             (None, 50)                2550      
                                                                 
 dense_2 (Dense)             (None, 50)                2550      
                                                                 
 dense_3 (Dense)             (None, 6)                 306       
                                                                 
Total params: 10,956
Trainable params: 10,956
Non-traina

In [18]:
memory = SequentialMemory(limit=50000, window_length=1)
policy = EpsGreedyQPolicy()
dqn = DQNAgent(model=model,
              nb_actions=action_size,
              memory=memory,
              nb_steps_warmup=500,
              target_model_update=8000, # 1e-2
              policy=policy)
dqn.compile(Adam(learning_rate=0.3), metrics=['mae'])
dqn.fit(env, nb_steps=1000,
       visualize=False,
       verbose=1,
       nb_max_episode_steps=99,
       log_interval=100000)

Training for 1000 steps ...
Interval 1 (0 steps performed)
    34/100000 [..............................] - ETA: 8:01 - reward: -1.5294

2022-05-24 23:06:28.835733: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


   500/100000 [..............................] - ETA: 6:31 - reward: -1.2520

2022-05-24 23:06:30.853721: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2022-05-24 23:06:30.892949: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


   504/100000 [..............................] - ETA: 8:09 - reward: -1.2679

2022-05-24 23:06:31.138769: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


   999/100000 [..............................] - ETA: 25:15 - reward: -1.2703done, took 15.376 seconds


<keras.callbacks.History at 0x29bd7b310>

Save Model

In [6]:
model_json = model.to_json()
with open("model.json", "w") as json_file:
    json_file.write(model_json)

In [12]:
model.save_weights("model.h5")
print("Saved model to disk")

Saved model to disk


Load model from data

In [13]:
from keras.models import model_from_json
json_file = open('model.json', 'r')
loaded_model_json = json_file.read()
json_file.close()
loaded_model = model_from_json(loaded_model_json)
# load weights into new model
loaded_model.load_weights("model.h5")
print("Loaded model from disk")

Loaded model from disk


In [14]:
loaded_model.compile(loss='binary_crossentropy', optimizer='rmsprop', metrics=['accuracy'])

In [19]:
import time
import numpy as np
from IPython.display import clear_output

In [20]:
def _get_action_for_state(state):
    predicted = model.predict_on_batch(tf.expand_dims(state, axis=0))
    action = np.argmax(predicted[0])
    return action

In [22]:
sleep = 0
max_steps = 25


try:
    actions_str = ["South", "North", "East", "West", "Pickup", "Dropoff"]

    iteration = 0
    state = env.reset()  # reset environment to a new, random state
    env.render()
    print(f"Iter: {iteration} - Action: *** - Reward ***")
    time.sleep(sleep)
    done = False

    while not done:
        action = _get_action_for_state(state)
        iteration += 1
        state, reward, done, info = env.step(action)
        clear_output(wait=True)
        env.render()
        print(f"Iter: {iteration} - Action: {action}({actions_str[action]}) - Reward {reward}")
        time.sleep(sleep)
        if iteration == max_steps:
            print("cannot converge :(")
            break
except KeyboardInterrupt:
    pass

+---------+
|R: | : :[35mG[0m|
| : | : : |
| : : : : |
| | : |[43m [0m: |
|[34;1mY[0m| : |B: |
+---------+
  (West)
Iter: 2500 - Action: 3(West) - Reward -1
cannot converge :(
