In [None]:
import gymnasium as gym
import numpy as np
from tensorflow.keras import layers, models, optimizers
from collections import deque
import random
import cv2
env = gym.make('ALE/Frogger-v5', render_mode="rgb_array")

In [None]:
def preprocess_state(state):
    # Assuming the state is a tuple where the RGB image is the first element
    state_rgb = state[0]
    # Convert the state to a format that can be resized
    state_rgb = np.array(state_rgb, dtype=np.uint8)
    # Resize the state to 84x84
    state_resized = cv2.resize(state_rgb, (84, 84))

    # Ensure that the resized state has 3 channels (RGB)
    if len(state_resized.shape) == 2:
        state_resized = cv2.cvtColor(state_resized, cv2.COLOR_GRAY2RGB)

    # Convert the resized state to grayscale
    state_gray = cv2.cvtColor(state_resized, cv2.COLOR_RGB2GRAY)
    return state_gray

In [None]:
# Initialize DQN model
def create_dqn_model(input_shape, num_actions):
    model = models.Sequential([
        layers.Conv2D(32, (8, 8), strides=(4, 4), activation='relu', input_shape=input_shape),
        layers.Conv2D(64, (4, 4), strides=(2, 2), activation='relu'),
        layers.Conv2D(64, (3, 3), strides=(1, 1), activation='relu'),
        layers.Flatten(),
        layers.Dense(512, activation='relu'),
        layers.Dense(num_actions)
    ])
    return model

In [None]:
# Initialize replay memory
replay_memory = deque(maxlen=10000)

# Initialize DQN model
input_shape = (84, 84, 4)
num_actions = env.action_space.n
dqn_model = create_dqn_model(input_shape, num_actions)
dqn_model_target = create_dqn_model(input_shape, num_actions)
dqn_model_target.set_weights(dqn_model.get_weights())

# Compile the DQN model
dqn_model.compile(optimizer=optimizers.Adam(learning_rate=0.00025), loss='mse')

# Define epsilon-greedy policy
epsilon_start = 1.0
epsilon_end = 0.1
epsilon_decay = 0.0001
epsilon = epsilon_start

# Define other hyperparameters
gamma = 0.99  # Discount factor
batch_size = 32
update_target_frequency = 1000
num_episodes = 100


In [None]:
# Training loop
for episode in range(num_episodes):
    state = env.reset()
    state = preprocess_state(state)
    state_stack = np.stack([state] * 4, axis=2)
    done = False
    total_reward = 0

    while not done:
        # Select action
        if np.random.rand() < epsilon:
            action = env.action_space.sample()
        else:
            q_values = dqn_model.predict(np.expand_dims(state_stack, axis=0))
            action = np.argmax(q_values)

        # Execute action
        next_state, reward, done, _, _ = env.step(action)
        next_state = preprocess_state(next_state)
        next_state_stack = np.append(state_stack[:, :, 1:], np.expand_dims(next_state, axis=2), axis=2)

        # Store experience in replay memory
        replay_memory.append((state_stack, action, reward, next_state_stack, done))

        # Update state
        state_stack = next_state_stack
        total_reward += reward

        # Sample mini-batch from replay memory
        if len(replay_memory) >= batch_size:
            mini_batch = random.sample(replay_memory, batch_size)

            # Calculate target Q-values
            states, actions, rewards, next_states, dones = zip(*mini_batch)
            states = np.array(states)
            next_states = np.array(next_states)
            target_q_values = dqn_model.predict(states)
            target_q_values_next = dqn_model_target.predict(next_states)
            for i in range(batch_size):
                target_q_values[i][actions[i]] = rewards[i] + (1 - dones[i]) * gamma * np.max(target_q_values_next[i])

            # Update DQN model
            dqn_model.fit(states, target_q_values, epochs=1, verbose=0)

        # Update target network
        if episode % update_target_frequency == 0:
            dqn_model_target.set_weights(dqn_model.get_weights())

    # Decay epsilon
    epsilon = max(epsilon_end, epsilon - epsilon_decay)

    print(f"Episode: {episode + 1}, Total Reward: {total_reward}, Epsilon: {epsilon}")

In [None]:
# Save the trained model
dqn_model.save('frogger_dqn_model.h5')

In [None]:
pip install psutil


In [None]:
import psutil

# Get the current memory usage in bytes
memory_usage = psutil.virtual_memory().used

# Convert memory usage to gigabytes for easier reading
memory_usage_gb = memory_usage / (1024 ** 3)

print(f"Current memory usage: {memory_usage_gb:.2f} GB")


In [None]:
import tensorflow as tf
loaded_model = tf.saved_model.load('frogger_dqn_model.h5')