In [1]:
%matplotlib inline

import gym
from gym.wrappers import Monitor
import itertools
import numpy as np
import os
import random
import sys
import psutil
import tensorflow as tf

In [2]:
if "../" not in sys.path:
  sys.path.append("../")

from lib import plotting
from collections import deque, namedtuple

In [3]:
env = gym.envs.make("Breakout-v0")

In [4]:
# Atari Actions: 0 (noop), 1 (fire), 2 (left) and 3 (right) are valid actions
VALID_ACTIONS = [0, 1, 2, 3]

In [5]:
class StateProcessor():
    """
    Processes a raw Atari images. Resizes it and converts it to grayscale.
    """
    def __init__(self):
        #build tensorflow graph
        with tf.variable_scope("state_processor"):
            self.input_state = tf.placeholder(shape=[210,160,3], dtype=tf.uint8)
            self.output= tf.image.rgb_to_grayscale(self.input_state)
            self.output= tf.image.crop_to_bounding_box(self.output, 34, 0, 160,160)
            self.output= tf.image.resize_images(self.output, [84,84], method=tf.image.ResizeMethod.NEAREST_NEIGHBOR)
            self.output = tf.squeeze(self.output)
            
    def process(self, sess, state):
        """
        Args:
            sess: A Tensorflow session object
            state: A [210, 160, 3] Atari RGB State

        Returns:
            A processed [84, 84] state representing grayscale values.
        """
        
        return sess.run(self.output, {self.input_state: state})
        

In [None]:
class Estimator():
    """Q-Value Estimator neural network.

    This network is used for both the Q-Network and the Target Network.
    """
    
    def __init__(self, scope="estimator", summaries_dir=None):
        self.scope = scope
        # writes tensorboard summaries to disk
        self.summary_writer = None
        with tf.variable_scope(scope):
            # build the graph
            self._build_model()
            if summaries.dir:
                summary_dir = os.path.join(summaries_dir, "summaries_{}".format(scope))
                if not os.path.exits(summary_dir):
                    os.makedirs(summary_dir)
                self.summary_writer = tf.summary.FileWriter(summary_dir)
                
    def _build_model(self):
        
        #builds tensorflow graph
        
        #placeholders for our input
        #our input ar 4 grayscale frames of shape 84,84 each
        self.X_pl = tf.placeholder(shape=[None, 84,84, 4], dtype= tf.uint8, name="X")
        #the TD target value
        self.y_pl = tf.placeholder(shape=[None], dtype = tf.float32, name = "y")
        #integer id of which action was selected
        self.actions_pl = tf.placeholder(shape=[None], dtype=tf.int32, name="actions")
        
        X= tf.to_float(self.X_pl) /  255.0
        batch_size= tf.shape(self.X_pl)[0]
        
        #Three convolutional layers
        conv1 = tf.contrib.layers.conv2d(X, 32, 8, 4, activation_fn=tf.nn.relu)
        conv2 = tf.contrib.layers.conv2d(conv1,64,4,2, activation_fn= tf.nn.relu)
        conv3 = tf.contrib.layers.conv2d(conv2, 64, 3, 1, activatioon_fn= tf.nn.relu)
        
        # fully connected layers
        flattened = tf.contrib.layers.flatten(conv3)
        fc1 = tf.contrib.layers.fully_connected(flattened, 512)
        self.predictions = tf.contrib.layers.fully_connected(fc1, len(VALID_ACTIONS))
        
        #get the predictions for the chosen actions only
        gather