# 0. Install Dependencies

In [26]:
!pip install tensorflow==2.3.1 gym keras-rl2 gym[atari]

# 1. Test Random Environment with OpenAI Gym

In [49]:
import gym 
import random

In [50]:
import atari_py
!python -m atari_py.import_roms ROMS
print(atari_py.list_games())

copying space_invaders.bin from ROMS\Space Invaders (1980) (Atari, Richard Maurer - Sears) (CX2632 - 49-75153) ~.bin to C:\Program Files\Python37\lib\site-packages\atari_py\atari_roms\space_invaders.bin
['space_invaders', 'tetris']


Traceback (most recent call last):
  File "C:\Program Files\Python37\lib\runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "C:\Program Files\Python37\lib\runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "C:\Program Files\Python37\lib\site-packages\atari_py\import_roms.py", line 93, in <module>
    main()
  File "C:\Program Files\Python37\lib\site-packages\atari_py\import_roms.py", line 89, in main
    import_roms(args.dirpath)
  File "C:\Program Files\Python37\lib\site-packages\atari_py\import_roms.py", line 82, in import_roms
    save_if_matches(f)
  File "C:\Program Files\Python37\lib\site-packages\atari_py\import_roms.py", line 68, in save_if_matches
    with open(rom_path, "wb") as out_f:
PermissionError: [Errno 13] Permission denied: 'C:\\Program Files\\Python37\\lib\\site-packages\\atari_py\\atari_roms\\space_invaders.bin'


In [51]:
env = gym.make('SpaceInvaders-v0')
height, width, channels = env.observation_space.shape
actions = env.action_space.n

In [52]:
env.unwrapped.get_action_meanings()

['NOOP', 'FIRE', 'RIGHT', 'LEFT', 'RIGHTFIRE', 'LEFTFIRE']

# 2. Create a Deep Learning Model with Keras

In [54]:
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Convolution2D
from tensorflow.keras.optimizers import Adam

In [55]:
def build_model(height, width, channels, actions):
    model = Sequential()
    model.add(Convolution2D(32, (8,8), strides=(4,4), activation='relu', input_shape=(3,height, width, channels)))
    model.add(Convolution2D(64, (4,4), strides=(2,2), activation='relu'))
    model.add(Convolution2D(64, (3,3), activation='relu'))
    model.add(Flatten())
    model.add(Dense(512, activation='relu'))
    model.add(Dense(256, activation='relu'))
    model.add(Dense(actions, activation='linear'))
    return model

In [57]:
model = build_model(height, width, channels, actions)

In [58]:
model.summary()

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_9 (Conv2D)            (None, 3, 51, 39, 32)     6176      
_________________________________________________________________
conv2d_10 (Conv2D)           (None, 3, 24, 18, 64)     32832     
_________________________________________________________________
conv2d_11 (Conv2D)           (None, 3, 22, 16, 64)     36928     
_________________________________________________________________
flatten_3 (Flatten)          (None, 67584)             0         
_________________________________________________________________
dense_12 (Dense)             (None, 512)               34603520  
_________________________________________________________________
dense_13 (Dense)             (None, 256)               131328    
_________________________________________________________________
dense_14 (Dense)             (None, 6)                

# 3. Build Agent with Keras-RL

In [59]:
from rl.agents import DQNAgent
from rl.memory import SequentialMemory
from rl.policy import LinearAnnealedPolicy, EpsGreedyQPolicy

In [60]:
def build_agent(model, actions):
    policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=1., value_min=.1, value_test=.2, nb_steps=10000)
    memory = SequentialMemory(limit=1000, window_length=3)
    dqn = DQNAgent(model=model, memory=memory, policy=policy,
                  enable_dueling_network=True, dueling_type='avg', 
                   nb_actions=actions, nb_steps_warmup=1000
                  )
    return dqn

In [61]:
dqn = build_agent(model, actions)
dqn.compile(Adam(lr=1e-4))

In [62]:
# Release either one of below to load pre-trained model or to train a new model.
# Load pre-trained model
dqn.load_weights('SavedWeights/1m/dqn_weights.h5f')
# Train model
# dqn.fit(env, nb_steps=50000, visualize=False, verbose=2)

In [64]:
# run below test after trained or loaded the model
scores = dqn.test(env, nb_episodes=100, visualize=False)
print(np.mean(scores.history['episode_reward']))

Testing for 100 episodes ...
Episode 1: reward: 260.000, steps: 1206
Episode 2: reward: 90.000, steps: 525
Episode 3: reward: 155.000, steps: 648
Episode 4: reward: 465.000, steps: 780
Episode 5: reward: 235.000, steps: 600
Episode 6: reward: 260.000, steps: 714
Episode 7: reward: 495.000, steps: 1158
Episode 8: reward: 255.000, steps: 898
Episode 9: reward: 405.000, steps: 1131
Episode 10: reward: 275.000, steps: 1259
Episode 11: reward: 395.000, steps: 1206
Episode 12: reward: 410.000, steps: 1127
Episode 13: reward: 170.000, steps: 633
Episode 14: reward: 250.000, steps: 947
Episode 15: reward: 320.000, steps: 1245
Episode 16: reward: 365.000, steps: 685
Episode 17: reward: 225.000, steps: 1403
Episode 18: reward: 190.000, steps: 636
Episode 19: reward: 255.000, steps: 1216
Episode 20: reward: 385.000, steps: 721
Episode 21: reward: 170.000, steps: 628
Episode 22: reward: 465.000, steps: 1111
Episode 23: reward: 190.000, steps: 661
Episode 24: reward: 250.000, steps: 1076
Episode 25

# 4. Save trained model

In [43]:
# release below if you're training a model
# dqn.save_weights('SavedWeights/2k/dqn_weights.h5f')