In [1]:
#importação das bibliotecas necessárias
import gym
import numpy as np
import tensorflow
import rl

from stable_baselines3 import PPO

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Convolution2D
from tensorflow.keras.optimizers import Adam

from rl.agents import DQNAgent
from rl.memory import SequentialMemory
from rl.policy import LinearAnnealedPolicy, EpsGreedyQPolicy

In [None]:
#verificação de ambientação
# Verifica se o ambiente está registrado
keys = gym.envs.registry.keys()
print("ALE/Assault-v5" in keys)

# Cria o ambiente
env = gym.make("ALE/Assault-v5", render_mode="rgb_array", obs_type="rgb", full_action_space=False)

# Verifica o espaço de observação e ações
height, width, channels = env.observation_space.shape
actions = env.action_space.n
print(f"Height: {height}, Width: {width}, Channels: {channels}")
print(f"Actions: {actions}")

### Aleatório

In [None]:
# Teste de execução
from copy import deepcopy
EPISODES = 1
for episode in range(1, EPISODES + 1):
    state = env.reset()
    DONE = False
    SCORE = 0

    # Loop de execução
    while not DONE:
        env.render()
        action = env.action_space.sample()
        n_state, reward, DONE, TRUNCATED, info = env.step(action)
        SCORE += reward

    # Exibe o resultado do episódio
    print(f"Episode: {episode}/{EPISODES}, Score: {SCORE}")
env.close()

### Treinamento

In [None]:
def build_model(height, width, channels, actions):
    model = Sequential()
    model.add(Convolution2D(32, (8,8), strides=(4,4), activation='relu', input_shape=(height, width, channels)))
    model.add(Convolution2D(64, (4,4), strides=(2,2), activation='relu'))
    model.add(Convolution2D(64, (3,3), activation='relu'))
    model.add(Flatten())
    model.add(Dense(512, activation='relu'))
    #model.add(Dense(256, activation='relu'))
    model.add(Dense(actions, activation='linear'))
    return model

In [None]:
model = build_model(height, width, channels, actions)

In [None]:
model.summary()

In [None]:
memory = SequentialMemory(limit=1000000, window_length=4)

In [None]:
def build_agent(model, actions):
    policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=1.0, value_min=0.1, value_test=0.05, nb_steps=1000000)
    memory = SequentialMemory(limit=1000000, window_length=4)
    dqn = DQNAgent(model=model, policy=policy, nb_actions=actions, memory=memory, nb_steps_warmup=50000, target_model_update=10000)

    return dqn

In [None]:
dqn = build_agent(model, actions)
dqn.compile(Adam(learning_rate=1e-4), metrics=['mae'])

In [None]:
dqn.fit(env, nb_steps=10000, visualize=True, verbose=2)

In [None]:
scores = dqn.test(env, nb_episodes=5, visualize=True)
print(np.mean(scores.history['episode_reward']))