In [None]:
import random
import gym
import numpy as np
from collections import deque
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import Adam

EPISODES = 1000

class DQNAgent:
    def __init__(self, state_size, action_size):
        self.state_size = state_size
        self.action_size = action_size
        self.memory = deque(maxlen=100000)
        self.gamma = 0.95    # discount rate
        self.epsilon = 1.0 # exploration rate
        self.epsilon2 = 0.0
        self.epsilon_min = 0.1
        self.epsilon_decay = 0.9999
        self.learning_rate = 0.05
        self.model = self._build_model()

    def _build_model(self):
        # Neural Net for Deep-Q learning Model
        model = Sequential()
        model.add(Dense(85, input_dim=self.state_size, activation='relu'))
        model.add(Dense(45, activation='relu'))
        model.add(Dense(25, activation='relu'))
        model.add(Dense(10, activation='relu'))
        model.add(Dense(self.action_size, activation='linear'))
        model.compile(loss='mse',
                      optimizer=Adam(lr=self.learning_rate))
        return model

    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))

    def act(self, state):
        if np.random.rand() <= self.epsilon:
            if np.random.rand() <= self.epsilon2:
                return get_action(state)
            else:
                return random.randrange(self.action_size) 
        #print( state.shape)
        act_values = self.model.predict(np.expand_dims(state, axis=0))
        return np.argmax(act_values[0])  # returns action

    def replay(self, batch_size):
        minibatch = random.sample(self.memory, batch_size)
        for state, action, reward, next_state, done in minibatch:
            target = reward
            if not done:
                target = (reward + self.gamma *
                          np.amax(self.model.predict(np.expand_dims(next_state, axis=0))[0]))
            target_f = self.model.predict(np.expand_dims(state, axis=0))
            target_f[0][action] = target
            self.model.fit(np.expand_dims(state, axis=0), target_f, epochs=1, verbose=0)
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay

    def load(self, name):
        self.model.load_weights(name)

    def save(self, name):
        self.model.save_weights(name)


env = gym.make('SpaceInvaders-v4')
state_size = 3021
action_size = env.action_space.n
agent = DQNAgent(state_size, action_size)
# agent.load("./save/cartpole-dqn.h5")
done = False
batch_size = 128

for e in range(EPISODES):
    state = env.reset()
    actual_reward = 0
    state = load_transform(state)
    for time in range(3000):
        #env.render()
        action = agent.act(state)
        next_state, reward, done, _ = env.step(action)
        actual_reward += reward if not done else -10
        next_state = load_transform(next_state)
        agent.remember(state, action, actual_reward, next_state, done)
        state = next_state
        if done:
            break
        if len(agent.memory) > batch_size:
            agent.replay(batch_size)
    print("episode: {}/{}, score: {}, reward: {}, e: {:.2} \r"
          .format(e, EPISODES, time, actual_reward, agent.epsilon))


  warn('The default multichannel argument (None) is deprecated.  Please '
  warn("The default mode, 'constant', will be changed to 'reflect' in "


episode: 0/1000, score: 862, reward: 215.0, e: 0.93 
episode: 1/1000, score: 726, reward: 100.0, e: 0.86 
episode: 2/1000, score: 607, reward: 100.0, e: 0.81 
episode: 3/1000, score: 663, reward: 170.0, e: 0.76 
episode: 4/1000, score: 691, reward: 315.0, e: 0.71 
episode: 5/1000, score: 630, reward: 125.0, e: 0.67 
episode: 6/1000, score: 718, reward: 160.0, e: 0.62 
episode: 7/1000, score: 528, reward: 55.0, e: 0.59 
episode: 8/1000, score: 1684, reward: 510.0, e: 0.5 
episode: 9/1000, score: 800, reward: 110.0, e: 0.46 
episode: 10/1000, score: 676, reward: 95.0, e: 0.43 
episode: 11/1000, score: 815, reward: 145.0, e: 0.4 
episode: 12/1000, score: 656, reward: 95.0, e: 0.37 
episode: 13/1000, score: 646, reward: 95.0, e: 0.35 
episode: 14/1000, score: 394, reward: 30.0, e: 0.33 
episode: 15/1000, score: 782, reward: 145.0, e: 0.31 
episode: 16/1000, score: 532, reward: 65.0, e: 0.29 
episode: 17/1000, score: 1225, reward: 205.0, e: 0.26 
episode: 18/1000, score: 645, reward: 95.0, 

In [4]:
import gym

In [5]:
env = gym.make('Cart-v4')

In [1]:
import gym
env = gym.make('SpaceInvaders-v4')

In [12]:
env.unwrapped.get_action_meanings()

['NOOP', 'FIRE', 'RIGHT', 'LEFT', 'RIGHTFIRE', 'LEFTFIRE']

In [3]:
from keras.models import load_model
model = load_model('/media/andres/Baymax/predictor.h5')
encoder = load_model('/media/andres/Baymax/encoder100.h5')

Using TensorFlow backend.


In [5]:
env = gym.make('SpaceInvaders-v4')
state_size = env.observation_space.shape[0] #377
action_size = env.action_space.n
agent = DQNAgent(state_size, action_size)

NameError: name 'DQNAgent' is not defined

In [13]:
state = env.reset()

In [1]:
import matplotlib.pyplot as plt

from skimage import io, color
from skimage.transform import rescale
from skimage.draw import rectangle
from skimage.measure import label, regionprops
from skimage.filters import threshold_otsu
from skimage.morphology import closing, square
import h5py

def load_transform(img):
    img = img[25:195,0:160]
    img_gray = color.rgb2gray(img)
    
    thresh = threshold_otsu(img_gray)
    bw = closing(img_gray > thresh, square(3))
    cleared = bw
    label_image = label(cleared)
    for region in regionprops(label_image):
        if region.area >= 10:
            minr, minc, maxr, maxc = region.bbox
            rr, cc = rectangle(start=(minr, minc), end=(maxr, maxc), shape=cleared.shape)
            cleared[rr, cc] = 1

    image_rescaled = rescale(cleared, 1.0 / 3.0, anti_aliasing=False)
    
    return image_rescaled.reshape(3021)

In [33]:
import numpy as np

state_transformed = load_transform(state)
encoded = encoder.predict(np.expand_dims(state_transformed, axis=0))
prediction = model.predict(encoded)

  warn('The default multichannel argument (None) is deprecated.  Please '
  warn("The default mode, 'constant', will be changed to 'reflect' in "


In [36]:
prediction.argmax()

1

In [28]:
state_transformed.shape

(3021,)

In [1]:
actions = ['NOOP', 'FIRE', 'UP', 'RIGHT', 'LEFT', 'DOWN', 'UPRIGHT', 'UPLEFT', 'DOWNRIGHT', 'DOWNLEFT', 'UPFIRE', 'RIGHTFIRE', 'LEFTFIRE', 'DOWNFIRE', 'UPRIGHTFIRE', 'UPLEFTFIRE', 'DOWNRIGHTFIRE', 'DOWNLEFTFIRE']
env_actions  = ['NOOP', 'FIRE', 'RIGHT', 'LEFT', 'RIGHTFIRE', 'LEFTFIRE']

import matplotlib.pyplot as plt

from skimage import io, color
from skimage.transform import rescale
from skimage.draw import rectangle
from skimage.measure import label, regionprops
from skimage.filters import threshold_otsu
from skimage.morphology import closing, square
from keras.models import load_model
import h5py

model = load_model('/media/andres/Baymax/predictor.h5')
encoder = load_model('/media/andres/Baymax/encoder100.h5')

def load_transform(img):
    img = img[25:195,0:160]
    img_gray = color.rgb2gray(img)
    
    thresh = threshold_otsu(img_gray)
    bw = closing(img_gray > thresh, square(3))
    cleared = bw
    label_image = label(cleared)
    for region in regionprops(label_image):
        if region.area >= 10:
            minr, minc, maxr, maxc = region.bbox
            rr, cc = rectangle(start=(minr, minc), end=(maxr, maxc), shape=cleared.shape)
            cleared[rr, cc] = 1

    image_rescaled = rescale(cleared, 1.0 / 3.0, anti_aliasing=False)
    
    return image_rescaled.reshape(3021)

def get_action(state):
    encoded = encoder.predict(np.expand_dims(state, axis=0))
    prediction = model.predict(encoded)[0]
    prediction_index = prediction.argmax()
    #print(prediction_index)
    try: 
        return env_actions.index(actions[prediction_index])
    except:
        return 0
    

Using TensorFlow backend.


In [3]:
env_actions.index('FIRE')

1