In [None]:
# A lot of code used by Nick Nochnack https://github.com/nicknochnack/KerasRL-OpenAI-Atari-SpaceInvadersv0

In [None]:
'''
Need very specific dependencies and environment
My system: Intel Mac
Commands to run in terminal in single quotes

Used a virtual environment using virtualenv
You may need to install virtualenv
Command pulling up virtual environment with python 3.7
'virtualenv -p python3.7 env'
'source env/bin/activate'

Update pip and pull specific versions of libraries/apis
'pip install --upgrade pip'
'pip install jupyter'
'pip install stable_base_lines3==1.0'
'pip install tensorflow==2.3.1'
'pip install gym==0.18.0'
'pip install keras_rl2==1.0.4'
'pip install atari_py==0.2.9'
'pip install autorom==0.4.2'
'pip install open-cv-python'

Download Atari 2600 ROMS:
type this command in terminal: 'autorom'
fill in current_path and current_directory
change directories command: 'cd /{current_path}/{current_directory}/env/lib/python3.7/site_packages/atari_py/atari_roms/'
once in correct directory type in command: 'python -m atari_py.import_roms .'
cd back into current_directory

run jupyter notebook with command: 'jupyter notebook'
Fill and run cells below

'''

In [1]:
import gym
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Convolution2D
from tensorflow.keras.optimizers import Adam
from rl.agents import DQNAgent
from rl.memory import SequentialMemory
from rl.policy import LinearAnnealedPolicy, EpsGreedyQPolicy
from rl.callbacks import ModelIntervalCheckpoint

WEIGHTS_FILENAME = f'spaceinv_weights.h5f'
CHECKPOINT_WEIGHTS_FILENAME = 'spaceinv_weights_{step}.h5f'

In [2]:
# Builds deep learning model, 3 convolution and 3 full layers
def build_model(height, width, channels, actions):
    model = Sequential()
    model.add(Convolution2D(32, (8,8), strides=(4,4), activation='relu', input_shape=(3,height, width, channels)))
    model.add(Convolution2D(64, (4,4), strides=(2,2), activation='relu'))
    model.add(Convolution2D(64, (3,3), activation='relu'))
    model.add(Flatten())
    model.add(Dense(512, activation='relu'))
    model.add(Dense(256, activation='relu'))
    model.add(Dense(actions, activation='linear'))
    return model

In [3]:
# Builds DQN model with annealing policy and sequential memory
def build_agent(model, actions):
    policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=1., value_min=.1, value_test=.2, nb_steps=10000)
    memory = SequentialMemory(limit=1000, window_length=3)
    dqn = DQNAgent(model=model, memory=memory, policy=policy,
                  enable_dueling_network=True, dueling_type='avg', 
                   nb_actions=actions, nb_steps_warmup=1000
                  )
    return dqn

In [None]:
# Run up environment, model, agent, and train agent
env = gym.make('SpaceInvaders-v0')
height, width, channels = env.observation_space.shape
actions = env.action_space.n
num_steps = 10000

print(env.unwrapped.get_action_meanings())

model = build_model(height, width, channels, actions)
model.summary()

callbacks = [ModelIntervalCheckpoint(CHECKPOINT_WEIGHTS_FILENAME, interval=5000)]

dqn = build_agent(model, actions)
dqn.compile(Adam(lr=0.00025))

dqn.fit(env, callbacks=callbacks, nb_steps=num_steps, visualize=False, verbose=2)
print('done')
dqn.save_weights(WEIGHTS_FILENAME, overwrite=True)

['NOOP', 'FIRE', 'RIGHT', 'LEFT', 'RIGHTFIRE', 'LEFTFIRE']
Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 3, 51, 39, 32)     6176      
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 3, 24, 18, 64)     32832     
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 3, 22, 16, 64)     36928     
_________________________________________________________________
flatten (Flatten)            (None, 67584)             0         
_________________________________________________________________
dense (Dense)                (None, 512)               34603520  
_________________________________________________________________
dense_1 (Dense)              (None, 256)               131328    
_______________________________________________________________

2022-12-08 15:25:21.360972: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN)to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-12-08 15:25:21.386083: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x7ff0909d37f0 initialized for platform Host (this does not guarantee that XLA will be used). Devices:
2022-12-08 15:25:21.386098: I tensorflow/compiler/xla/service/service.cc:176]   StreamExecutor device (0): Host, Default Version


Training for 10000 steps ...
Instructions for updating:
This property should not be used in TensorFlow 2.0, as updates are applied automatically.
  655/10000: episode: 1, duration: 10.996s, episode steps: 655, steps per second:  60, episode reward: 155.000, mean reward:  0.237 [ 0.000, 30.000], mean action: 2.449 [0.000, 5.000],  loss: --, mean_q: --, mean_eps: --
 1351/10000: episode: 2, duration: 236.486s, episode steps: 696, steps per second:   3, episode reward: 155.000, mean reward:  0.223 [ 0.000, 30.000], mean action: 2.503 [0.000, 5.000],  loss: 79.571968, mean_q: 10.693740, mean_eps: 0.894205
 2220/10000: episode: 3, duration: 565.602s, episode steps: 869, steps per second:   2, episode reward: 210.000, mean reward:  0.242 [ 0.000, 30.000], mean action: 2.540 [0.000, 5.000],  loss: 1.273533, mean_q: 8.856466, mean_eps: 0.839350
 3215/10000: episode: 4, duration: 640.592s, episode steps: 995, steps per second:   2, episode reward: 260.000, mean reward:  0.261 [ 0.000, 30.000], 

In [10]:
print(f'Just trained for {num_steps} number of steps')
scores = dqn.test(env, nb_episodes=10, visualize=True)
print(np.mean(scores.history['episode_reward']))
print('hello')

Just trained for 10000 number of steps
Testing for 10 episodes ...
Episode 1: reward: 80.000, steps: 672
Episode 2: reward: 245.000, steps: 1090
Episode 3: reward: 135.000, steps: 704
Episode 4: reward: 145.000, steps: 887
Episode 5: reward: 75.000, steps: 513
Episode 6: reward: 110.000, steps: 875
Episode 7: reward: 105.000, steps: 912
Episode 8: reward: 165.000, steps: 1070
Episode 9: reward: 75.000, steps: 390
Episode 10: reward: 55.000, steps: 397
119.0
hello
