Julien Gauthier

# Reinforcement Learning (Deep Q) Project

#### WARNING : install the correct versions of gym/tensorflow/keras-rl2 in a virtual environment.

In [None]:
# These are the correct versions for this project to work :
%pip install tensorflow==2.12.0 keras-rl2==1.0.5 gym==0.25.2
%pip install pygame

### I. Setting up the OpenAI Cart Pole environment

In [None]:
import gym
import random

The environment is where the experiment takes place, the states are the different input parameters (in this case : cart position, cart velocity, pole angle, pole tip velocity) and the actions are the output possibilities (move the cart left or right).

In [None]:
env = gym.make("CartPole-v1", render_mode="human")
states = env.observation_space.shape[0]
actions = env.action_space.n

Testing the environment with random actions.

In [None]:
episodes = 10
for episode in range(1, episodes + 1) :
    state = env.reset()
    score = 0
    done = False
    
    while not done :
        env.render()
        action = random.choice([0, 1])
        next_state, reward, done, info = env.step(action)
        score+=reward
    print('Episode:{} Score:{}'.format(episode, score))
env.close()

### II. Deep Learning model with Keras.

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.optimizers.legacy import Adam

Let's create a function that will build our model.

In [None]:
def build_model(states, actions) :
    model = Sequential()
    model.add(Flatten(input_shape=(1, states)))
    model.add(Dense(24, activation='relu'))
    model.add(Dense(24, activation='relu'))
    model.add(Dense(actions, activation='linear'))
    return model

We can now use our function to create and show an instance of the model :

In [None]:
model = build_model(states, actions)
model.summary()

### III. Agent creation with Keras-RL

In [None]:
from rl.agents import DQNAgent
from rl.policy import BoltzmannQPolicy
from rl.memory import SequentialMemory

Let's create a function to build an agent with a given model and possible actions. We'll use the Boltzmann Q Policy and the DQN Algorithm.

In [None]:
def build_agent(model, actions) :
    policy = BoltzmannQPolicy()
    memory = SequentialMemory(limit=50000, window_length=1)
    dqn = DQNAgent(model=model, memory=memory, policy=policy, 
                   nb_actions=actions, nb_steps_warmup=10, target_model_update=1e-2)
    return dqn

Now, let's build the agent

In [None]:
dqn = build_agent(model, actions)

### IV. Training (and visualizing) the agent

We can now train the agent : (visualize=True to see the progress in real time)

In [None]:
env = gym.make("CartPole-v1", render_mode="human")
states = env.observation_space.shape[0]
actions = env.action_space.n

dqn.compile(Adam(learning_rate=1e-3), metrics=['mae'])
dqn.fit(env, nb_steps=100000, visualize=True, verbose=1)

Save your model before pygame crashes ! (TO DO : FIX)

In [None]:
dqn.save_weights('pre-trained-model-100ksteps.h5f', overwrite=True)

#### WARNING : this will reset your model ! (ONLY IF YOU WANT TO RE-TRAIN YOUR MODEL)

In [None]:
del model
del dqn
model = build_model(states, actions)
dqn = build_agent(model, actions)

# You can now re-run the previous cell to train the model or import a pre-trained model in the next cell

##### Test a saved model in the Cart Pole environment :

In [None]:
env = gym.make('CartPole-v1')
actions = env.action_space.n
states = env.observation_space.shape[0]
model = build_model(states, actions)
dqn = build_agent(model, actions)
dqn.compile(Adam(learning_rate=1e-3), metrics=['mae'])

dqn.load_weights('pre-trained-model-100ksteps.h5f') # Load the pre-trained model from the repository

In [None]:
dqnscores = dqn.test(env, nb_episodes=3, visualize=True)
print(np.mean(dqnscores.history['episode_reward']))