# Pacman.ai
---
*by Hans Kamin & David Lutze*  

An artificial intelligence that learns how to beat the game *Ms. Pacman* using Deep Q Learning.    

A very grateful shout-out to the YouTuber Siraj Raval for teaching us what we needed to know to create this through his awesome [video](https://www.youtube.com/watch?v=79pmNdyxEGo)!

In [1]:
# Gym contains the game environment.
import gym

# We need these libraries for various jobs.
import random
import numpy as np
import tensorflow as tf

# Deque is used for storing moves. It minimizes storage.
from collections import deque

# Sequential network, creating one layer at a time.
from keras.models import Sequential 

# Dense layers are standard, fully connected layers. 
from keras.layers import Dense

  return f(*args, **kwds)
Using TensorFlow backend.


In [4]:
# Initialize the Pacman environment & the network.
game = gym.make("MsPacman-v0")
net = Sequential()

# Save important details about the game environment.
state_size = game.observation_space.shape[0]
action_size = game.action_space.n

In [6]:
# Add layers to the network for each state.
# Inputs to the network are game states.
# Outputs are Q-vals of potential actions.
net.add(Dense(20, input_dim=state_size, activation="relu")) 

net.add(Dense(18, activation='relu'))
net.add(Dense(10, activation='relu'))

# We need the number of outputs to equal the number of potential actions.
net.add(Dense(action_size, activation="linear"))

# Compile the inputs to calculate our Q-values.
net.compile(loss="mse", optimizer="adam", metrics=["accuracy"])

In [7]:
# Declare & initialize more settings we'll need.
storage = deque(maxlen=2000)
discount_rate = .95
explore_rate = 1.0
explore_min = .01
explore_decay = .995
learning_rate = .001

In [None]:
'''Generate an action for the bot to take.'''
def perform_action(state):
    # Make a completely random move if exploration rate
    # is low or unchanged. Else, make educated guess.
    if np.random.rand() <= explore_rate:
        return random.randrange(action_size)
    else:
        action_vals = net.predict(state)
        return np.argmax(action_vals[0])

In [None]:
'''Save the parameters that led to defeat so we can learn from them.'''
def record_results(action, reward, state, next_state, game_over):
    storage.append((action, reward, state, next_state, game_over))

In [None]:
def replay_game(batch_size):

In [None]:
num_games = 1000
batch_size = 32
game_over = False

for i in range(num_games):
    state = game.reset()
    state = np.reshape(state, [1,state_size])
    for time in range(500):
        #game.render()
        action = perform_action(state)
        next_state, reward, game_over, x = game.step(action)
        reward = reward if not game_over else -10
        next_state = np.reshape(next_state, [1,state_size])
        record_results(action, reward, state, next_state, game_over)
        state = next_state
        if game_over:
            print ("game: {} of {}, time alive: {}, exploration rate: {:.2}"
                  .format(i, num_games, time, explore_rate))
            break
    if len(storage) > batch_size:
        replay_game(batch_size)