In [9]:
# !pip install keras-rl2

In [10]:
import tempfile
import gym
import numpy as np
from keras.models import Sequential
from keras.layers.core import Dense, Activation, Flatten, Dropout, Reshape
from keras.layers.embeddings import Embedding
from tensorflow.keras.optimizers import Adam
from rl.agents.dqn import DQNAgent
from rl.policy import Policy, BoltzmannQPolicy
from rl.memory import SequentialMemory

In [11]:
class DecayEpsGreedyQPolicy(Policy):

    def __init__(self, max_eps=.1, min_eps=.05, lamb=0.001):
        super(DecayEpsGreedyQPolicy, self).__init__()
        self.max_eps = max_eps
        self.lambd = lamb
        self._steps = 0
        self.min_eps = min_eps

    def select_action(self, q_values):
        assert q_values.ndim == 1
        nb_actions = q_values.shape[0]
        eps = self.min_eps + (self.max_eps - self.min_eps) * \
            np.exp(-self.lambd * self._steps)
        self._steps += 1
        if self._steps % 1e3 == 0:
            print("Current eps:", eps)
        if np.random.uniform() < eps:
            action = np.random.random_integers(0, nb_actions - 1)
        else:
            action = np.argmax(q_values)
        return action


ENV_NAME = 'FrozenLake8x8-v1'

In [12]:
np.set_printoptions(threshold=np.inf)
np.set_printoptions(precision=4)

env = gym.make(ENV_NAME)
nb_actions = env.action_space.n
nb_actions

In [13]:
def get_keras_model(action_space_shape):
    model = Sequential()
    model.add(Embedding(action_space_shape, 4, input_length=1))
    # model.add(Embedding(8, 4, input_length=1))
    model.add(Reshape((4,)))
    print(model.summary())
    return model

model = get_keras_model(nb_actions)

In [14]:
memory = SequentialMemory(limit=10000, window_length=1)
policy = DecayEpsGreedyQPolicy(max_eps=0.9, min_eps=0, lamb=1 / (1e4))
dqn = DQNAgent(model=model, nb_actions=nb_actions,
               memory=memory, nb_steps_warmup=500,
               target_model_update=1e-2, policy=policy,
               enable_double_dqn=False, batch_size=512,  
               )
dqn.compile(Adam())

In [15]:
try:
    dqn.load_weights('dqn_{}_weights.h5f'.format(ENV_NAME))
except Exception as e:
    print(e)
    pass

In [16]:
temp_folder = tempfile.mkdtemp()

dqn.fit(env, nb_steps=1e5, visualize=False, verbose=1, log_interval=10000)

In [17]:
dqn.save_weights('dqn_{}_weights.h5f'.format(ENV_NAME), overwrite=True)
dqn.test(env, nb_episodes=20, visualize=False)

In [22]:
model.save("DQN_FORZNE_LAKE_8x8.h5")