In [None]:
#Credits and Thanks
#https://www.youtube.com/watch?v=cO5g5qLrLSo
#https://keras-rl.readthedocs.io/en/latest/agents/overview/
#https://www.gymlibrary.dev/environments/classic_control/cart_pole/

In [23]:
import gym
import numpy as np
import tensorflow

In [24]:
env = gym.make("CartPole-v1")
states = env.observation_space.shape[0]
actions = env.action_space.n

In [25]:
def build_model(states, actions, layers):
    model = tensorflow.keras.Sequential()
    model.add(tensorflow.keras.layers.Flatten(input_shape=(1,states)))
    for layer in range (0,layers):
        model.add(tensorflow.keras.layers.Dense(24, activation='relu'))
    model.add(tensorflow.keras.layers.Dense(actions, activation='linear'))
    return model

In [26]:
model = build_model(states, actions, 3)

In [27]:
from rl.agents import DQNAgent
from rl.agents import SARSAAgent
from rl.policy import BoltzmannQPolicy
from rl.memory import SequentialMemory

In [28]:
def build_agent(model, actions, agent="DQN"):
    policy = BoltzmannQPolicy()
    memory = SequentialMemory(limit=50000, window_length=1)
    if agent=="DQN":
        return DQNAgent(model=model, memory=memory, policy=policy, nb_actions=actions)
    elif agent=="SARSA":
        return SARSAAgent(model=model, policy=policy, nb_actions=actions)
    else:
        return

In [29]:
models = {"SARSA": "", "DQN": ""}
for modelName in models.keys():
    print(f'Training {modelName} model...')
    agent = build_agent(model, actions, modelName)
    agent.compile(tensorflow.keras.optimizers.legacy.Adam(lr=1e-3), metrics=['mae'])
    agent.fit(env, nb_steps=50000, visualize=False, verbose=0)
    models[modelName] = agent

Training SARSA model...
Training DQN model...


In [30]:
for modelName in models.keys():
    scores = models[modelName].test(env, nb_episodes=100, visualize=False,verbose=0)
    print(modelName, ": ", np.mean(scores.history['episode_reward']))
    models[modelName].save_weights(f'{modelName}_weights.h5f')

SARSA :  500.0
DQN :  500.0
