<a href="https://colab.research.google.com/github/cormacGreaney/Implementing-a-Deep-Q-Network-for-Atari-Pong-Using-Gymnasium-and-ALE/blob/Cube-Testing-Fun-Branch/trying%20to%20build%20a%20modek.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import gymnasium as gym            # main RL library
import numpy as np                 # handy for arrays
import matplotlib.pyplot as plt    # for visuals
import ale_py                      # Atari emulator backend

# Plug the Atari environments into Gymnasium.
# Without this, Pong won't appear in the registry.
gym.register_envs(ale_py)

In [None]:
# Make a Pong environment that returns RGB frames so we can display them.
env = gym.make("PongNoFrameskip-v4", render_mode="rgb_array")

# gets image information
height, width, channels = env.observation_space.shape

# gets all available actions
num_actions = env.action_space.sample()

# Reset the environment to start a new game.
obs, info = env.reset()

# Just print what kind of data we got back.
print("Obs type/shape:", type(obs), getattr(obs, "shape", None))

In [None]:
env.unwrapped.get_action_meanings()

In [None]:
# We'll collect a few frames by taking random actions.
terminated = False
truncated = False
frames = []

for t in range(200):
    # Choose a random action from Pong's action space.
    action = env.action_space.sample()

    # Step the environment forward.
    obs, reward, terminated, truncated, info = env.step(action)

    # Save the frame so we can preview it later.
    frames.append(obs)

    # Stop early if the episode ends.
    if terminated or truncated:
        break

print("Collected", len(frames), "frames")

In [None]:
# Display the very first frame we grabbed.
plt.imshow(frames[0])
plt.axis("off")
plt.show()

In [None]:
# will run 5 different games of pong
episodes = 5
for episode in range(1, episodes + 1):
  # resets the env space
    obs, info = env.reset()
    terminated = False
    truncated = False
    score = 0

    # while the instance is not terminated it will run random actions to see its performance
    while not terminated and not truncated:

      # env.render() # Uncomment to see the rendering, idk where it renders
      action = env.action_space.sample()

      # after its random actions and is terminated will return its rewards
      obs, reward, terminated, truncated, info= env.step(action)
      score += reward

      # print out episodes and there scores
    print('Episode:{} Score{}'.format(episode, score))
env.close()

In [None]:
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Conv2D
from tensorflow.keras.optimizers import Adam

In [None]:
# the shape of the screen will define hopw out deep learning model looks like
def build_model(height, width, channels, actions):
    model = Sequential()

    # as we use a image based model we will use convelution to flatten the image, we will train filters to detect where the agients are in the images
    model.add(Conv2D(31, (8,8), strides=(4,4), activation='relu', input_shape=(height, width, channels)))

    model.add(Conv2D(64, (4,4), strides=(2,2), activation='relu'))

    model.add(Conv2D(64, (3,3), activation='relu'))

    # idk yet what dis do
    model.add(Flatten())

    # Dense layers
    model.add(Dense(512, activation='relu'))

    model.add(Dense(256, activation='relu'))

    # compresses based on the number of actions that pong can make
    model.add(Dense(actions, activation='linear'))

    return model

In [None]:
# build the model :)
model = build_model(height, width, channels, num_actions)

In [None]:
model.summary()

In [None]:

from keras_rl2.agents import DQNAgent
from keras_rl2.policy import LinearAnnealedPolicy, EpsGreedyQPolicy
from keras_rl2.memory import SequentialMemory

In [None]:
def build_agent(model, actions):
  policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=1., value_min=.1, value_test=.2, nb_steps=10000)
  memory = SequentialMemory(limit=1000, window_length=3)
  dqn = DQNAgent(model=model, memory=memory, policy=policy, enable_dueling_network=True, dueling_type='avg', nb_actions=actions, nb_steps_warmup=1000)
  return dqn

In [None]:
dqn = build_agent(model,action)
dqn.compile(Adam(learning_rate=1e-4))
dqn.fit(env, nb_steps=10000,visualize = False,verbose=2)