# Deep Q-Network (DQN) for Breakout Game

This notebook implements the Deep Q-Network (DQN) algorithm for training an agent to play the Breakout game using OpenAI Gym.

## 1. Import Dependencies

In [1]:
import gym
import random
import numpy as np
from collections import deque
from skimage.color import rgb2gray
from skimage.transform import resize
import cv2
import tensorflow.compat.v1 as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras.layers import Conv2D, Dense, Flatten
from tensorflow.python.framework.ops import disable_eager_execution

tf.disable_v2_behavior()
disable_eager_execution()

np.random.seed(1)
random.seed(1)
tf.set_random_seed(1)

2023-05-31 13:55:18.397611: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-05-31 13:55:18.477716: I tensorflow/core/util/port.cc:104] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2023-05-31 13:55:18.480028: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /home/dfki.uni-bremen.de/csingh/anaconda3/lib/python3.9/site-packages/cv2/../../lib64:/opt/ros

Instructions for updating:
non-resource variables are not supported in the long term


## 2. Define Constants

In [2]:
# Environment settings
STATE_SIZE = (84, 84, 4)
ACTION_SIZE = 3

# Training parameters
EPSILON_START = 1.0
EPSILON_END = 0.1
EXPLORATION_STEPS = 1000000
EPSILON_DECAY_STEP = (EPSILON_START - EPSILON_END) / EXPLORATION_STEPS
BATCH_SIZE = 32
TRAIN_START = 50000
UPDATE_TARGET_RATE = 10000
DISCOUNT_FACTOR = 0.99
MEMORY_SIZE = 400000
NO_OP_STEPS = 30

# Model parameters
LEARNING_RATE = 0.00025
OPTIMIZER_EPSILON = 0.01

# General parameters
EPISODES = 50000

## 3. Define DQNAgent Class

In [4]:
class DQNAgent:
    def __init__(self):
        self.state_size = STATE_SIZE
        self.action_size = ACTION_SIZE
        
        self.memory = deque(maxlen=MEMORY_SIZE)
        
        self.gamma = DISCOUNT_FACTOR
        
        self.epsilon = EPSILON_START
        
        self.epsilon_decay_step = EPSILON_DECAY_STEP
        
        self.epsilon_end = EPSILON_END
        
        self.model = self._build_model()
        self.target_model = self._build_model()
        
        self.update_target_model()
        
    def _build_model(self):
        model = Sequential()
        model.add(Conv2D(32, (8, 8), strides=(4, 4), activation='relu', input_shape=self.state_size))
        model.add(Conv2D(64, (4, 4), strides=(2, 2), activation='relu'))
        model.add(Conv2D(64, (3, 3), strides=(1, 1), activation='relu'))
        model.add(Flatten())
        model.add(Dense(512, activation='relu'))
        model.add(Dense(self.action_size))
        
        optimizer = RMSprop(lr=LEARNING_RATE, epsilon=OPTIMIZER_EPSILON)
        model.compile(loss='mse', optimizer=optimizer)
        
        return model
    
    def update_target_model(self):
        self.target_model.set_weights(self.model.get_weights())
    
    def get_action(self, state):
        if np.random.rand() <= self.epsilon:
            return random.randrange(self.action_size)
        else:
            q_values = self.model.predict(state)
           return np.argmax(q_values[0])
    
    def replay_memory(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))
    
    def train_replay(self):
        if len(self.memory) < TRAIN_START:
            return
        
        minibatch = random.sample(self.memory, BATCH_SIZE)
        
        states = np.zeros((BATCH_SIZE, *self.state_size))
        next_states = np.zeros((BATCH_SIZE, *self.state_size))
        actions, rewards, dones = [], [], []
        
        for i in range(BATCH_SIZE):
            states[i] = minibatch[i][0]
            actions.append(minibatch[i][1])
            rewards.append(minibatch[i][2])
            next_states[i] = minibatch[i][3]
            dones.append(minibatch[i][4])
        
        q_values = self.model.predict(states)
        next_q_values = self.target_model.predict(next_states)
        
        for i in range(BATCH_SIZE):
            if dones[i]:
                q_values[i][actions[i]] = rewards[i]
            else:
                q_values[i][actions[i]] = rewards[i] + self.gamma * np.max(next_q_values[i])
        
        self.model.fit(states, q_values, verbose=0)
        
        if self.epsilon > self.epsilon_end:
            self.epsilon -= self.epsilon_decay_step


IndentationError: unindent does not match any outer indentation level (<tokenize>, line 43)

## 4. Define Preprocessing Functions

In [5]:
def preprocess_frame(frame):
    gray = rgb2gray(frame)
    resized = resize(gray, (110, 84), mode='constant')
    cropped = resized[18:102, :]
    preprocessed = cv2.resize(cropped, (84, 84), interpolation=cv2.INTER_NEAREST)
    preprocessed = preprocessed / 255.0
    return preprocessed

def stack_frames(stacked_frames, state, is_new_episode):
    frame = preprocess_frame(state)
    if is_new_episode:
        stacked_frames = deque([frame, frame, frame, frame], maxlen=4)
    else:
        stacked_frames.append(frame)
    stacked_state = np.stack(stacked_frames, axis=2)
    return stacked_state, stacked_frames

## 5. Define Training Loop

In [6]:
def train_dqn():
    env = gym.make('BreakoutDeterministic-v4')
    
    agent = DQNAgent()
    
    stacked_frames = deque(maxlen=4)
    
    for episode in range(EPISODES):
        
        state = env.reset()
        stacked_state, stacked_frames = stack_frames(stacked_frames, state, True)
        
        done = False
        total_reward = 0
        
        while not done:
            
            action = agent.get_action(np.expand_dims(stacked_state, axis=0))
            
            next_state, reward, done, _ = env.step(action)
            
            next_stacked_state, stacked_frames = stack_frames(stacked_frames, next_state, False)
            
            agent.replay_memory(stacked_state, action, reward, next_stacked_state, done)
            
            stacked_state = next_stacked_state
            
            if len(agent.memory) > TRAIN_START:
                agent.train_replay()
                
            total_reward += reward
            
            if done:
                agent.update_target_model()
                
                print(f'Episode: {episode+1}/{EPISODES}, Score: {total_reward}')
                
                if episode % 10 == 0:
                    agent.model.save_weights('breakout_weights.h5')
                
train_dqn()

A.L.E: Arcade Learning Environment (version 0.8.1+53f58b7)
[Powered by Stella]


NameError: name 'DQNAgent' is not defined