# Actor Critic Model

## Initial setup

In [1]:
import sys
sys.path.insert(0, "../python")
print(sys.path)

['../python', '', '/home/james/anaconda3/envs/vizdoom/lib/python36.zip', '/home/james/anaconda3/envs/vizdoom/lib/python3.6', '/home/james/anaconda3/envs/vizdoom/lib/python3.6/lib-dynload', '/home/james/.local/lib/python3.6/site-packages', '/home/james/anaconda3/envs/vizdoom/lib/python3.6/site-packages', '/home/james/anaconda3/envs/vizdoom/lib/python3.6/site-packages/Sphinx-1.5.4-py3.6.egg', '/home/james/anaconda3/envs/vizdoom/lib/python3.6/site-packages/setuptools-27.2.0-py3.6.egg', '/home/james/anaconda3/envs/vizdoom/lib/python3.6/site-packages/IPython/extensions', '/home/james/.ipython']


In [2]:
from vizdoom import *
from helper import create_agent
import tensorflow as tf

In [3]:
# If running other experiments on GPUs
import os
os.environ["CUDA_VISIBLE_DEVICES"]=""

In [8]:
%load_ext autoreload
%autoreload 2

In [9]:
# Initializes DoomGame from config file
def initialize_vizdoom(config_file):
    print("Initializing doom... ", end=""), sys.stdout.flush()
    game = DoomGame()
    game.load_config(config_file)
    game.init()
    print("Done.")
    return game  

In [58]:
# Initialize agent and TensorFlow graph
tf.reset_default_graph()
agent_file_path = "../agents/test.json"
config_file_path = "../config/test.cfg"
results_dir = "../tmp/tmp_results/"
action_set = "basic_three"
game = initialize_vizdoom(config_file_path)
agent = create_agent(agent_file_path,
                     game=game, 
                     action_set=action_set,
                     output_directory=results_dir)

Initializing doom... Done.


In [59]:
def print_agent_status():
    print("s1_buffer:    \n", agent.s1_buffer[:, :3, 1, 1])
    print("a_buffer:     \n", agent.a_buffer)
    print("s2_buffer:    \n", agent.s2_buffer[:, :3, 1, 1])
    print("r_buffer:     \n", agent.r_buffer)
    print("gamma_buffer: \n", agent.gamma_buffer)
    print("memory r:     \n", agent.memory.r[:5])

In [60]:
# View memory storage
agent.initialize_new_episode()
for i in range(5):
    print("Step %d: " % (i+1))
    agent.perform_learning_step()
    print_agent_status()

Step 1: 
s1_buffer:    
 [[ 0.28836533  0.28864545  0.26703683]
 [ 0.          0.          0.        ]
 [ 0.          0.          0.        ]
 [ 0.          0.          0.        ]
 [ 0.          0.          0.        ]]
a_buffer:     
 [2 0 0 0 0]
s2_buffer:    
 [[ 0.28836533  0.28864545  0.26703683]
 [ 0.          0.          0.        ]
 [ 0.          0.          0.        ]
 [ 0.          0.          0.        ]
 [ 0.          0.          0.        ]]
r_buffer:     
 [-0.04  0.    0.    0.    0.  ]
gamma_buffer: 
 [ 1.          0.99        0.9801      0.970299    0.96059601]
memory r:     
 [ 0.  0.  0.  0.  0.]
Step 2: 
s1_buffer:    
 [[ 0.28836533  0.28864545  0.26703683]
 [ 0.28836533  0.28864545  0.26703683]
 [ 0.          0.          0.        ]
 [ 0.          0.          0.        ]
 [ 0.          0.          0.        ]]
a_buffer:     
 [2 0 0 0 0]
s2_buffer:    
 [[ 0.28836533  0.28864545  0.26703683]
 [ 0.28836533  0.28864545  0.26703683]
 [ 0.          0.          0.   

In [61]:
# Now view adding transition to memory
for i in range(5):
    print("Step %d: " % (i+1))
    agent.perform_learning_step()
    print_agent_status()

Step 1: 
s1_buffer:    
 [[ 0.37389955  0.3882353   0.3739796 ]
 [ 0.28836533  0.28864545  0.26703683]
 [ 0.28836533  0.28864545  0.26703683]
 [ 0.28836533  0.28864545  0.26703683]
 [ 0.34117648  0.37340936  0.34117648]]
a_buffer:     
 [0 0 2 0 0]
s2_buffer:    
 [[ 0.40738297  0.3882353   0.3882353 ]
 [ 0.28836533  0.28864545  0.26703683]
 [ 0.28836533  0.28864545  0.26703683]
 [ 0.34117648  0.37340936  0.34117648]
 [ 0.37389955  0.3882353   0.3739796 ]]
r_buffer:     
 [-0.04 -0.04 -0.04 -0.04 -0.04]
gamma_buffer: 
 [ 0.96059601  1.          0.99        0.9801      0.970299  ]
memory r:     
 [-9.88252354  0.          0.          0.          0.        ]
Step 2: 
s1_buffer:    
 [[ 0.37389955  0.3882353   0.3739796 ]
 [ 0.40738297  0.3882353   0.3882353 ]
 [ 0.28836533  0.28864545  0.26703683]
 [ 0.28836533  0.28864545  0.26703683]
 [ 0.34117648  0.37340936  0.34117648]]
a_buffer:     
 [0 0 2 0 0]
s2_buffer:    
 [[ 0.40738297  0.3882353   0.3882353 ]
 [ 0.40234095  0.46162465  0.41