In [9]:
!pip install tensorflow==2.3.1 gym keras-rl2



In [10]:
pip install git+https://github.com/Kojoley/atari-py.git

Collecting git+https://github.com/Kojoley/atari-py.git
  Cloning https://github.com/Kojoley/atari-py.git to c:\users\garima\appdata\local\temp\pip-req-build-22mf64af
Note: you may need to restart the kernel to use updated packages.


  Running command git clone -q https://github.com/Kojoley/atari-py.git 'C:\Users\Garima\AppData\Local\Temp\pip-req-build-22mf64af'


In [11]:
!pip install cmake
!pip install atari-py
!pip install gym[atari]



In [2]:
import atari_py
print(atari_py.list_games())

['adventure', 'air_raid', 'alien', 'amidar', 'assault', 'asterix', 'asteroids', 'atlantis', 'bank_heist', 'battle_zone', 'beam_rider', 'berzerk', 'bowling', 'boxing', 'breakout', 'carnival', 'centipede', 'chopper_command', 'crazy_climber', 'defender', 'demon_attack', 'double_dunk', 'elevator_action', 'enduro', 'fishing_derby', 'freeway', 'frostbite', 'gopher', 'gravitar', 'hero', 'ice_hockey', 'jamesbond', 'journey_escape', 'kaboom', 'kangaroo', 'krull', 'kung_fu_master', 'montezuma_revenge', 'ms_pacman', 'name_this_game', 'phoenix', 'pitfall', 'pong', 'pooyan', 'private_eye', 'qbert', 'riverraid', 'road_runner', 'robotank', 'seaquest', 'skiing', 'solaris', 'space_invaders', 'star_gunner', 'tennis', 'time_pilot', 'tutankham', 'up_n_down', 'venture', 'video_pinball', 'wizard_of_wor', 'yars_revenge', 'zaxxon']


In [3]:
import gym
import random

In [14]:
%pip install -U gym>=0.21.0
%pip install -U gym[atari,accept-rom-license]

Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.


In [4]:
env = gym.make('SpaceInvaders-v0')
height, width, channels = env.observation_space.shape
actions = env.action_space.n

In [5]:
env.unwrapped.get_action_meanings()

['NOOP', 'FIRE', 'RIGHT', 'LEFT', 'RIGHTFIRE', 'LEFTFIRE']

In [19]:
!pip install pyglet

Collecting pyglet
  Downloading pyglet-1.5.21-py3-none-any.whl (1.1 MB)
Installing collected packages: pyglet
Successfully installed pyglet-1.5.21


In [5]:
#playing 5 different games
episodes = 5 
for episode in range(1, episodes+1):
    state = env.reset()
    done = False
    score = 0;
    
    while not done:
        env.render()
        action = random.choice([0, 1, 2, 3, 4, 5])
        n_state, reward, done, info = env.step(action)
        score += reward
        
    print('Episode:{} Score:{}'.format(episode, score))
env.close()

  logger.warn(


Episode:1 Score:80.0
Episode:2 Score:120.0
Episode:3 Score:80.0
Episode:4 Score:75.0
Episode:5 Score:485.0


# Creating a Deep Learning Model w Keras

In [6]:
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Convolution2D
from tensorflow.keras.optimizers import Adam

In [7]:
def build_model(height, width, channels, actions):
    model = Sequential()
    #stacking layers, starting w Convolution layers because it's an image-based model
    #32 filters(training the filter to detect different things in the images), 8by8 size of filters, moving 4 steps to right and 4 steps down
    model.add(Convolution2D(32, (8,8), strides=(4, 4), activation='relu', input_shape=(3, height, width, channels)))
    model.add(Convolution2D(64, (4,4), strides=(2, 2), activation='relu'))
    #no stride means 1by1 means pixel by pixel
    model.add(Convolution2D(64, (3,3), activation='relu'))
    model.add(Flatten())
    #making dense (fully-connected) layers
    model.add(Dense(512, activation='relu'))
    model.add(Dense(256, activation='relu'))
    model.add(Dense(actions, activation='linear'))
    return model

In [16]:
del model

In [17]:
model = build_model(height, width, channels, actions)

In [18]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_3 (Conv2D)            (None, 3, 51, 39, 32)     6176      
_________________________________________________________________
conv2d_4 (Conv2D)            (None, 3, 24, 18, 64)     32832     
_________________________________________________________________
conv2d_5 (Conv2D)            (None, 3, 22, 16, 64)     36928     
_________________________________________________________________
flatten_1 (Flatten)          (None, 67584)             0         
_________________________________________________________________
dense_5 (Dense)              (None, 512)               34603520  
_________________________________________________________________
dense_6 (Dense)              (None, 256)               131328    
_________________________________________________________________
dense_7 (Dense)              (None, 6)                

# Build Agent with Keras-RL

In [10]:
from rl.agents import DQNAgent
from rl.memory import SequentialMemory
from rl.policy import LinearAnnealedPolicy, EpsGreedyQPolicy

In [19]:
def build_agent(model, actions):
    policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=1., value_min=.1, value_test=.2, nb_steps=10000)
    memory = SequentialMemory(limit=1000, window_length=3)
    #dueling networks help the model learn when to take action and when not to bother
    dqn = DQNAgent(model=model, memory=memory, policy=policy, 
                  enable_dueling_network=True, dueling_type='avg',
                  nb_actions=actions, nb_steps_warmup=1000
                  )
    return dqn

In [20]:
dqn = build_agent(model,  actions)
dqn.compile(Adam(lr=1e-4)) #sets the optimizer

In [18]:
dqn.fit(env, nb_steps=10000, visualize=False, verbose=2)

Training for 10000 steps ...
  764/10000: episode: 1, duration: 19.228s, episode steps: 764, steps per second:  40, episode reward: 210.000, mean reward:  0.275 [ 0.000, 30.000], mean action: 2.438 [0.000, 5.000],  loss: --, mean_q: --, mean_eps: --
 1298/10000: episode: 2, duration: 390.057s, episode steps: 534, steps per second:   1, episode reward: 45.000, mean reward:  0.084 [ 0.000, 15.000], mean action: 2.356 [0.000, 5.000],  loss: 1.167755, mean_q: 5.738909, mean_eps: 0.896590
 1978/10000: episode: 3, duration: 855.535s, episode steps: 680, steps per second:   1, episode reward: 110.000, mean reward:  0.162 [ 0.000, 30.000], mean action: 2.463 [0.000, 5.000],  loss: 0.180742, mean_q: 5.366101, mean_eps: 0.852625
 2523/10000: episode: 4, duration: 653.894s, episode steps: 545, steps per second:   1, episode reward: 50.000, mean reward:  0.092 [ 0.000, 20.000], mean action: 2.505 [0.000, 5.000],  loss: 0.083864, mean_q: 5.272575, mean_eps: 0.797500
 3258/10000: episode: 5, duratio

<tensorflow.python.keras.callbacks.History at 0x17600579ee0>

In [22]:
scores = dqn.test(env, nb_episodes=10, visualize=True)
print(np.mean(scores.history['episode_reward']))

Testing for 10 episodes ...
Instructions for updating:
This property should not be used in TensorFlow 2.0, as updates are applied automatically.


  logger.warn(


Episode 1: reward: 110.000, steps: 453
Episode 2: reward: 30.000, steps: 388
Episode 3: reward: 325.000, steps: 1034
Episode 4: reward: 210.000, steps: 686
Episode 5: reward: 240.000, steps: 1228
Episode 6: reward: 260.000, steps: 1235
Episode 7: reward: 230.000, steps: 884
Episode 8: reward: 245.000, steps: 904
Episode 9: reward: 250.000, steps: 1129
Episode 10: reward: 130.000, steps: 640
203.0


# Reloading Agent from Memory

In [22]:
dqn.save_weights('SavedWeights/10k-Fast/dqn_weights.h5f')

In [1]:
del model, dqn

NameError: name 'model' is not defined

In [21]:
dqn.load_weights('SavedWeights/1m/dqn_weights.h5f')