In [8]:
import gym 
import random

In [9]:
env = gym.make('SpaceInvaders-v4')
height, width, channels = env.observation_space.shape
actions = env.action_space.n

In [10]:
env.get_action_meanings()

['NOOP', 'FIRE', 'RIGHT', 'LEFT', 'RIGHTFIRE', 'LEFTFIRE']

In [49]:
height

210

In [50]:
width

160

In [51]:
channels

3

In [36]:
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Convolution2D
from tensorflow.keras.optimizers import Adam

In [37]:
def build_model(height, width, channels, actions):
    model = Sequential()
    model.add(Convolution2D(32, (8,8), strides=(4,4), activation='relu', 
                            input_shape=(3,height, width, channels)))
    model.add(Convolution2D(64, (4,4), strides=(2,2), activation='relu'))
    model.add(Convolution2D(64, (3,3),  activation='relu'))
    model.add(Flatten())
    model.add(Dense(512, activation='relu'))
    model.add(Dense(actions, activation='linear'))
    return model

In [38]:
model = build_model(height, width, channels, actions)

In [39]:
model.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_6 (Conv2D)            (None, 3, 51, 39, 32)     6176      
_________________________________________________________________
conv2d_7 (Conv2D)            (None, 3, 24, 18, 64)     32832     
_________________________________________________________________
conv2d_8 (Conv2D)            (None, 3, 22, 16, 64)     36928     
_________________________________________________________________
flatten_2 (Flatten)          (None, 67584)             0         
_________________________________________________________________
dense_9 (Dense)              (None, 512)               34603520  
_________________________________________________________________
dense_10 (Dense)             (None, 6)                 3078      
Total params: 34,682,534
Trainable params: 34,682,534
Non-trainable params: 0
__________________________________________

In [40]:
from rl.agents import DQNAgent, SARSAAgent
from rl.memory import SequentialMemory
from rl.policy import LinearAnnealedPolicy, EpsGreedyQPolicy, BoltzmannQPolicy

In [42]:
total_episodes = 10000
total_test_episodes = 10
max_steps = 10000
learning_rate = 0.01
Gamma = 0.99
epsilon = 1.0
min_epsilon = 0.1

In [43]:
def build_agent(model, actions):
    policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=epsilon, value_min=min_epsilon, value_test=.2, nb_steps=max_steps)
    memory = SequentialMemory(limit=1000, window_length=3)
    dqn = DQNAgent(model=model, memory=memory, policy=policy,
                  enable_dueling_network=True, dueling_type='avg', 
                   nb_actions=actions, nb_steps_warmup=2000,
                   gamma=Gamma
                  )
    return dqn
dqn = build_agent(model, actions)
dqn.compile(Adam(lr=learning_rate))

In [22]:
del model

In [44]:
dqn = build_agent(model, actions)
dqn.compile(Adam(lr=learning_rate))

In [45]:
dqn.fit(env, nb_steps=total_episodes, visualize=False, verbose=2)

Training for 10000 steps ...
  788/10000: episode: 1, duration: 18.955s, episode steps: 788, steps per second:  42, episode reward: 150.000, mean reward:  0.190 [ 0.000, 30.000], mean action: 2.569 [0.000, 5.000],  loss: --, mean_q: --, mean_eps: --
 1213/10000: episode: 2, duration: 10.271s, episode steps: 425, steps per second:  41, episode reward: 75.000, mean reward:  0.176 [ 0.000, 25.000], mean action: 2.515 [0.000, 5.000],  loss: --, mean_q: --, mean_eps: --
 1629/10000: episode: 3, duration: 10.014s, episode steps: 416, steps per second:  42, episode reward: 75.000, mean reward:  0.180 [ 0.000, 25.000], mean action: 2.548 [0.000, 5.000],  loss: --, mean_q: --, mean_eps: --
 2269/10000: episode: 4, duration: 192.290s, episode steps: 640, steps per second:   3, episode reward: 120.000, mean reward:  0.188 [ 0.000, 30.000], mean action: 2.561 [0.000, 5.000],  loss: 4354527.451991, mean_q: 231.011517, mean_eps: 0.807895
 2953/10000: episode: 5, duration: 474.437s, episode steps: 68

<tensorflow.python.keras.callbacks.History at 0x18a24c78bc8>

In [47]:
scores = dqn.test(env, nb_episodes=total_test_episodes, visualize=True)
print(np.mean(scores.history['episode_reward']))

Testing for 10 episodes ...
Episode 1: reward: 420.000, steps: 1075
Episode 2: reward: 105.000, steps: 661
Episode 3: reward: 155.000, steps: 698
Episode 4: reward: 300.000, steps: 1381
Episode 5: reward: 120.000, steps: 670
Episode 6: reward: 185.000, steps: 702
Episode 7: reward: 120.000, steps: 656
Episode 8: reward: 145.000, steps: 936
Episode 9: reward: 435.000, steps: 889
Episode 10: reward: 185.000, steps: 916
217.0


In [48]:
dqn.save_weights('Save/10000/dqn.h5f')

In [None]:
episodes = 5
for episode in range(1, episodes+1):
    state = env.reset()
    done = False
    score = 0 
    
    while not done:
        env.render()
        action = random.choice([0,1,2,3,4,5])
        n_state, reward, done, info = env.step(action)
        score+=reward
    print('Episode:{} Score:{}'.format(episode, score))
env.close()

In [None]:
dqn.load_weights('Save/10000/dqn.h5f')

Copyright <2022> Peichen Han

Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.