In [1]:
import gym
env = gym.make('SpaceInvaders-v0')

In [2]:
episodes =10

for episode in range(1,episodes):
  state = env.reset()
  done = False
  score = 0

  while not done:
    env.render()
    state, reward, done, info = env.step(env.action_space.sample())
    score+= reward
  print('Episode: {}\nScore {}'.format(episode, score))


print("Done;")
env.close()

Episode: 1
Score 150.0
Episode: 2
Score 65.0
Episode: 3
Score 135.0
Episode: 4
Score 210.0
Episode: 5
Score 110.0
Episode: 6
Score 370.0
Episode: 7
Score 90.0
Episode: 8
Score 65.0
Episode: 9
Score 120.0
Done;


In [3]:
#import nn
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Conv2D
from tensorflow.keras.optimizers import Adam

In [4]:
def build_model(height,width,channels,actions):
    model = Sequential()
    model.add(Conv2D(32,(8,8),strides=(4,4),activation='relu',input_shape=(3,height,width,channels)))
    model.add(Conv2D(64,(4,4),strides=(2,2),activation='relu'))
    model.add(Flatten())
    model.add(Dense(512,activation='relu'))
    model.add(Dense(256,activation='relu'))
    model.add(Dense(actions,activation='linear'))
    return model

In [5]:
height,width,channels = env.observation_space.shape
actions = env.action_space.n

In [15]:
del model #delete model

In [16]:
model = build_model(height,width,channels,actions)

In [17]:
from rl.agents import DQNAgent
from rl.memory import SequentialMemory
from rl.policy import LinearAnnealedPolicy, EpsGreedyQPolicy

In [18]:
def build_agent(model,actions):
    policy = LinearAnnealedPolicy(EpsGreedyQPolicy(),attr='eps',value_max=1.,value_min=.1,value_test=.2,nb_steps=10000)
    memory = SequentialMemory(limit=2000, window_length=3)
    dqn = DQNAgent(model=model, memory=memory, policy=policy, enable_dueling_network=True, dueling_type='avg',nb_actions=actions,nb_steps_warmup=1000)
    
    return dqn

In [19]:
dqn = build_agent(model,actions)

In [20]:
dqn.compile(Adam(lr=0.001))

In [27]:
hist = dqn.fit(env,nb_steps=40000, visualize=False, verbose=1)

Training for 40000 steps ...
Interval 1 (0 steps performed)
 1190/10000 [==>...........................] - ETA: 17:23 - reward: 0.1807done, took 142.331 seconds


In [23]:
scores = dqn.test(env,nb_episodes=10,visualize=True)
print(np.mean(scores.history['episode_reward']))

Testing for 10 episodes ...
Episode 1: reward: 60.000, steps: 600
Episode 2: reward: 260.000, steps: 1263
Episode 3: reward: 35.000, steps: 618
Episode 4: reward: 290.000, steps: 1152
Episode 5: reward: 85.000, steps: 699
Episode 6: reward: 35.000, steps: 521
Episode 7: reward: 135.000, steps: 685
Episode 8: reward: 50.000, steps: 718
Episode 9: reward: 40.000, steps: 643
Episode 10: reward: 160.000, steps: 932
115.0


In [24]:
dqn.save_weights('models/dqn.h5f')

In [25]:
dqn.load_weights('models/dqn.h5f')