In [1]:
import gym

In [2]:
env= gym.make('SpaceInvaders-v0')

In [3]:
env.action_space #has 6 discrete actions

Discrete(6)

In [25]:
episodes=10

for episode in range(1,episodes):
    state=env.reset() #resetting env at start
    done=False #done flag to tell agent has completed
    score=0
    
    while not done:
        env.render()
        state,reward,done,info=env.step(env.action_space.sample())#does random action in action space
        score+=reward
    print('Episode: {}\nScore: {}'.format(episode,score))
env.close()

Episode: 1
Score: 240.0
Episode: 2
Score: 30.0
Episode: 3
Score: 30.0
Episode: 4
Score: 80.0
Episode: 5
Score: 120.0
Episode: 6
Score: 120.0
Episode: 7
Score: 155.0
Episode: 8
Score: 105.0
Episode: 9
Score: 50.0


In [4]:
#imports
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Conv2D
from tensorflow.keras.optimizers import Adam

In [5]:
def build_model(height, width, channels, actions):
    model= Sequential()#sequential layer
    model.add(Conv2D(32,(8,8),strides=(4,4), activation='relu', input_shape=(3, height, width,channels)))#32 no of filters 8 is filter size and 4 is stride width
    model.add(Conv2D(64,(4,4), strides=(2,2), activation='relu'))
    model.add(Flatten())
    model.add(Dense(512,activation='relu'))
    model.add(Dense(256,activation='relu'))
    model.add(Dense(actions,activation='linear'))
    return model

In [6]:
height, width,channels=env.observation_space.shape
actions=env.action_space.n

In [13]:
model=build_model(height,width,channels,actions)

In [14]:
from rl.agents import DQNAgent
from rl.memory import SequentialMemory
from rl.policy import LinearAnnealedPolicy, EpsGreedyQPolicy

In [15]:
def build_agent(model,actions):
    policy=LinearAnnealedPolicy(EpsGreedyQPolicy(),attr='eps',value_max=1, value_min=.1,value_test=.2, nb_steps=10000)
    memory=SequentialMemory(limit=2000,window_length=3)
    dqn=DQNAgent(model=model, memory=memory,policy=policy,enable_dueling_network=True,dueling_type='avg',nb_actions=actions, nb_steps_warmup=1000)
    return dqn

In [12]:
#del model 
#uncomment and run if you get memory error in below cell 
#go back to creating model after running this


In [16]:
dqn=build_agent(model,actions)

In [17]:
dqn.compile(Adam(lr=0.001))

In [18]:
'''
Possible optimization:
* add more conv layers
* add more dense layers
* increase dataset size by increasing sequential memory limit(currently 2000)
* Change greedy max and min
* more training
* change learning rate
'''

'\nPossible optimization:\n* add more conv layers\n* add more dense layers\n* increase dataset size by increasing sequential memory limit(currently 2000)\n* Change greedy max and min\n* more training\n* change learning rate\n'

In [None]:
dqn.fit(env,nb_steps=40000,visualize=False,verbose=1)#visualization decreases training speed

Training for 40000 steps ...
Interval 1 (0 steps performed)
Instructions for updating:
This property should not be used in TensorFlow 2.0, as updates are applied automatically.
 1078/10000 [==>...........................] - ETA: 12:35 - reward: 0.1438

In [20]:
scores=dqn.test(env,nb_episodes=10,visualize=True)
print(np.mean(scores.history['episode_reward']))

Testing for 10 episodes ...
Instructions for updating:
This property should not be used in TensorFlow 2.0, as updates are applied automatically.
Episode 1: reward: 90.000, steps: 792
Episode 2: reward: 135.000, steps: 749
Episode 3: reward: 105.000, steps: 649
Episode 4: reward: 195.000, steps: 855
Episode 5: reward: 110.000, steps: 618
Episode 6: reward: 160.000, steps: 747
Episode 7: reward: 230.000, steps: 944
Episode 8: reward: 535.000, steps: 1227
Episode 9: reward: 110.000, steps: 723
Episode 10: reward: 155.000, steps: 601
182.5


In [None]:
dqn.save_weights('models/dqn001.h5f')

In [19]:
dqn.load_weights('models/dqn.h5f')