In [1]:
# Cecilia Aponte
# AI - Reinforcement Learning
# Breakout

# This only needs to be done once per notebook.
# Install the PyDrive wrapper & import libraries.
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials

# Authenticate and create the PyDrive client.
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)

your_module = drive.CreateFile({'id':'17C0gsmd0Ujipwrzm1xibvcKRx78KMS02'})
your_module.GetContentFile('deepqnet.py')

# from google.colab import drive
# drive.mount('/content/gdrive')
# !ls /content/gdrive/My\ Drive/Breakout_Project/


import gym
from deepqnet import *
import numpy as np
import matplotlib.pyplot as plt



# remove information of the image that is not needed, score, or rgb color (only one channel)
def preprocess(observation):
    observation = observation / 255
    obs = np.mean(observation[30:, :], axis=2).reshape(180,160,1)
    return obs


# stack the frames so agent understands the motion
def stackFrames(stacked_frames, frame, bufferSize):
    if stacked_frames is None: # initialize
        stacked_frames = np.zeros((bufferSize, *frame.shape))
        for id,_ in enumerate(stacked_frames):
            stacked_frames[id,:] = frame
    else: # stack the frames chosen
        stacked_frames[0:bufferSize-1,:] = stacked_frames[1:,:]
        stacked_frames[bufferSize-1, :] = frame
    stacked_frames = stacked_frames.reshape(*frame.shape[0:2], bufferSize)

    return stacked_frames

# Plot the results
def plot(x, scores, epsHist, rewards):
    fig, axs = plt.subplots(3)
    axs[0].plot( x, scores, marker='o', markerfacecolor='green', markersize=5, color='green', linewidth=4)
    axs[1].plot( x, epsHist, marker='', color='blue', linewidth=4)
    axs[2].plot( x, rewards, marker='', color='purple', linewidth=4, linestyle='dashed', label="rewards")

    plt.xticks(np.arange(min(x), max(x)+1, 1.0))
    plt.xlabel('Episodes', fontsize=10)
    axs[0].set_ylabel('Score', fontsize=10)
    axs[1].set_ylabel('Epsilon', fontsize=10)
    axs[2].set_ylabel('Reward', fontsize=10)

    for ax in axs.flat:
        ax.label_outer()

    fig.suptitle('Change in Score, Epsilon, and Reward with Time', fontsize=15)
    plt.show()


if __name__ == '__main__':
    env = gym.make('Breakout-v0')
    loadCkpt = False
    agent = Agent(epsilon=1.0, gamma=0.99, alpha=0.005, inputDims=(180,160,4), nActions=4, MemSize=10000, batchSize=64)

    if loadCkpt:
        agent.loadModels()

    scores = []
    epsHist = []
    rewards = []
    nGames = 800 # games per epoch to get reward
    stackSize = 4
    score = 0
    nSteps = 0


    for i in range(nGames):
        done = False

        observation = env.reset()
        observation = preprocess(observation)
        stacked_frames = None
        observation = stackFrames(stacked_frames, observation, stackSize)
        score = 0
        totRewards = 0
        while not done:
            action = agent.chooseAction(observation)
            obsNext, reward, done, info = env.step(action)
            reward = np.clip(reward, -1, 1)
            if done == True:
                reward = -1
            totRewards += reward
            nSteps += 1
            obsNext = stackFrames(stacked_frames, preprocess(obsNext), stackSize)
            score += reward
            agent.storeTransition(observation, action, reward, obsNext, int(done))
            observation = obsNext


            if nSteps % 5 == 0:
                agent.learning()
              
        if i % 20 == 0 and i > 0:
            # to see if agent score is increasing (agent is learning)
            avg_score = np.mean(scores[max(0, i-10):(i+1)])
            print('episode: ', i, '\t','score: ', score, '\t',
                 ' average score %.3f: ' % avg_score, '\t',
                'epsilon: %.3f ' % agent.epsilon, '\t Total Reward: ', totRewards)
        else:
            print('episode: ', i, '\t','score: ', score, '\t epsilon: %.3f ' % agent.epsilon, '\t reward: ', reward)
        epsHist.append(agent.epsilon)
        scores.append(score)
        rewards.append(reward)
    x = [i+1 for i in range(nGames)]
    
    plot(x, scores, epsHist, reward)





The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
  * https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.

Instructions for updating:
Use keras.layers.flatten instead.



episode:  0 	 score:  0.0 	 epsilon: 1.000  	 reward:  -1
episode:  1 	 score:  0.0 	 epsilon: 1.000  	 reward:  -1
episode:  2 	 score:  1.0 	 epsilon: 1.000  	 reward:  -1
episode:  3 	 score:  1.0 	 epsilon: 1.000  	 reward:  -1
episode:  4 	 score:  0.0 	 epsilon: 1.000  	 reward:  -1
episode:  5 	 score:  1.0 	 epsilon: 1.000  	 reward:  -1
episode:  6 	 score:  1.0 	 epsilon: 1.000  	 reward:  -1
episode:  7 	 score:  0.0 	 epsilon: 1.000  	 reward:  -1
episode:  8 	 score:  0.0 	 epsilon: 1.000  	 reward:  -1
episode:  9 	 score:  -1.0 	 epsilon: 1.000  	 

KeyboardInterrupt: ignored