# Imports

In [1]:
from gym import Env
from gym.spaces import Discrete, Box
import numpy as np
import random

import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.optimizers import Adam

from rl.agents import DQNAgent
from rl.policy import BoltzmannQPolicy
from rl.memory import SequentialMemory

# Environment Data

In [2]:
#Imported data taken from the robot's readings of the entire environment.

data = [[122, 121, 162, 1792, 1202, 1183, 1183, 1211, 1550, 2169, 2122, 2550, 2550, 2550, 157, 126, 126, 149, 195, 149, 122, 122], [126, 130, 173, 1813, 1198, 1187, 1196, 1541, 1563, 2142, 2163, 2192, 2550, 2550, 2550, 118, 118, 147, 191, 145, 126, 126], [130, 137, 170, 1813, 1211, 1192, 1198, 1231, 1551, 2124, 2162, 2132, 2550, 2550, 139, 113, 113, 135, 213, 191, 130, 130], [130, 138, 162, 1816, 1219, 1198, 1202, 1559, 1567, 2170, 2116, 2155, 2550, 2550, 126, 109, 109, 130, 213, 172, 137, 135], [130, 128, 166, 1809, 1218, 1202, 1198, 1227, 1550, 2137, 2136, 2138, 2550, 2550, 2550, 112, 109, 121, 154, 204, 135, 128], [112, 118, 137, 951, 901, 864, 865, 872, 933, 1300, 2201, 2154, 2550, 2550, 473, 443, 441, 447, 504, 523, 120, 114], [112, 118, 149, 917, 873, 856, 856, 879, 1290, 1303, 2128, 2550, 2550, 2550, 471, 447, 442, 462, 509, 2550, 118, 113], [109, 112, 137, 971, 873, 853, 849, 863, 921, 1301, 2159, 2200, 2550, 2550, 481, 456, 447, 463, 519, 962, 113, 109], [112, 114, 143, 925, 873, 863, 857, 864, 923, 1307, 2122, 2161, 2550, 2550, 474, 447, 447, 455, 504, 1015, 113, 114], [118, 118, 147, 908, 873, 861, 861, 878, 1290, 1305, 2261, 2153, 2550, 2550, 473, 448, 439, 456, 510, 515, 120, 118], [447, 448, 481, 523, 865, 849, 853, 863, 923, 1649, 1695, 1801, 1893, 2550, 483, 464, 455, 464, 504, 670, 447, 443], [447, 456, 483, 1038, 863, 847, 849, 865, 915, 1588, 1808, 1793, 2550, 2550, 483, 460, 460, 475, 666, 496, 454, 452], [454, 460, 488, 500, 861, 842, 847, 873, 1318, 1581, 1494, 1792, 2550, 2550, 481, 460, 460, 475, 668, 490, 456, 454], [452, 455, 491, 500, 853, 843, 842, 871, 905, 1589, 1505, 1797, 1885, 2550, 474, 464, 469, 483, 670, 474, 452, 448], [460, 464, 500, 517, 864, 848, 853, 869, 1317, 1683, 1478, 1793, 1893, 2550, 475, 454, 454, 469, 515, 500, 462, 460], [448, 448, 481, 492, 1217, 1198, 1191, 1217, 2114, 1846, 1816, 1827, 1848, 2441, 158, 120, 120, 130, 154, 479, 452, 446], [443, 443, 473, 479, 1227, 1198, 1202, 1227, 2116, 1893, 1829, 1835, 1857, 2550, 2550, 113, 114, 122, 158, 481, 447, 439], [446, 452, 483, 488, 1210, 1198, 1200, 1234, 2550, 1848, 1821, 1825, 2550, 2550, 2550, 114, 114, 130, 508, 464, 446, 445], [437, 439, 466, 483, 1226, 1202, 1202, 1223, 2550, 2550, 1825, 1821, 2550, 2550, 135, 112, 109, 128, 156, 479, 443, 443], [439, 439, 464, 483, 1218, 1202, 1206, 1237, 2237, 1857, 1823, 1821, 1858, 2386, 147, 114, 107, 126, 149, 474, 446, 436]]

Zone_1 = [data[0], data[1], data[2], data[3], data[4]]
Zone_2 = [data[5], data[6], data[7], data[8], data[9]]
Zone_3 = [data[10], data[11], data[12], data[13], data[14]]
Zone_4 = [data[15], data[16], data[17], data[18], data[19]]

# Training Environment

In [3]:
class TrainZoneEnv(Env):
    def __init__(self):
        # Actions we can take: Guess 1 of 4 zones
        self.action_space = Discrete(4)
        #  array
        self.observation_space = Box(low=np.array([0]), high=np.array([3]))
        # Set initial Zone Guess
        self.state = 0
        # Import the real Zone's sensor data
        Zone = random.randint(0,3)
        Num = 5*(Zone)
        sample = data[Num]
        self.sensor_data = sample
        # Set amount of guesses per run
        self.guess_length = 10
        
    def step(self, action):
        # Difine the sensor values for each Zone
        sensor_zone = [Zone_1, Zone_2, Zone_3, Zone_4]
        #apply the action
        self.state = action
        # Reduce guess length by 1
        self.guess_length -= 1 
        
        # Calculate reward
        # if the sensor data is in the chosen zone class, reward = 1
        if self.sensor_data in sensor_zone[self.state]: 
            reward = 1
            correct = True
        else: 
            reward = -1
            correct = False
        
        # Check if guessing is done
        if self.guess_length <= 0 or correct == True: 
            done = True
        else:
            done = False
        
        # Set placeholder for info
        info = {}
        
        # Return step information
        return self.state, reward, done, info

    def render(self):
        # Implement viz
        pass
    
    def reset(self):
        # Reset Initial Zone Guess
        self.state = 0
        # Import a new Zone's sensor data
        Zone = random.randint(0,3)
        Num = 5*(Zone)
        sample = data[Num]
        self.sensor_data = sample
        # Reset Guess amount
        self.guess_length = 10
        return self.state

In [4]:
env = TrainZoneEnv()

episodes = 10
for episode in range(1, episodes+1):
    state = env.reset()
    done = False
    score = 0
    
    while not done:
        action = env.action_space.sample()
        n_state, reward, done, info = env.step(action)
        score+=reward
    print('Episode:{} Score:{} Zone guess:{}'.format(episode, score, n_state+1))

Episode:1 Score:0 Zone guess:4
Episode:2 Score:1 Zone guess:1
Episode:3 Score:-2 Zone guess:2
Episode:4 Score:-2 Zone guess:2
Episode:5 Score:1 Zone guess:2
Episode:6 Score:-3 Zone guess:3
Episode:7 Score:1 Zone guess:1
Episode:8 Score:-3 Zone guess:3
Episode:9 Score:0 Zone guess:4
Episode:10 Score:-1 Zone guess:2


  logger.warn(f"Box bound precision lowered by casting to {self.dtype}")


# Model Building

In [5]:
states = env.observation_space.shape
actions = env.action_space.n

def build_model(states, actions):
    model = Sequential()    
    model.add(Dense(24, activation='relu', input_shape=states))
    model.add(Dense(24, activation='relu'))
    model.add(Dense(actions, activation='linear'))
    return model

In [6]:
del model

NameError: name 'model' is not defined

In [7]:
model = build_model(states, actions)
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 24)                48        
                                                                 
 dense_1 (Dense)             (None, 24)                600       
                                                                 
 dense_2 (Dense)             (None, 4)                 100       
                                                                 
Total params: 748
Trainable params: 748
Non-trainable params: 0
_________________________________________________________________


In [8]:
def build_agent(model, actions):
    policy = BoltzmannQPolicy()
    memory = SequentialMemory(limit=50000, window_length=1)
    dqn = DQNAgent(model=model, memory=memory, policy=policy, 
                  nb_actions=actions, nb_steps_warmup=10, target_model_update=1e-2)
    return dqn

# Model Training

In [9]:
dqn = build_agent(model, actions)
dqn.compile(Adam(lr=1e-3), metrics=['mae'])
dqn.fit(env, nb_steps=50000, visualize=False, verbose=1)

  super().__init__(name, **kwargs)


Training for 50000 steps ...
Interval 1 (0 steps performed)
    1/10000 [..............................] - ETA: 7:28 - reward: -1.0000

  updates=self.state_updates,
  batch_idxs = np.random.random_integers(low, high - 1, size=size)


   39/10000 [..............................] - ETA: 2:16 - reward: -0.4872

  batch_idxs = np.random.random_integers(low, high - 1, size=size)
  batch_idxs = np.random.random_integers(low, high - 1, size=size)
  batch_idxs = np.random.random_integers(low, high - 1, size=size)
  batch_idxs = np.random.random_integers(low, high - 1, size=size)
  batch_idxs = np.random.random_integers(low, high - 1, size=size)
  batch_idxs = np.random.random_integers(low, high - 1, size=size)
  batch_idxs = np.random.random_integers(low, high - 1, size=size)
  batch_idxs = np.random.random_integers(low, high - 1, size=size)
  batch_idxs = np.random.random_integers(low, high - 1, size=size)
  batch_idxs = np.random.random_integers(low, high - 1, size=size)
  batch_idxs = np.random.random_integers(low, high - 1, size=size)
  batch_idxs = np.random.random_integers(low, high - 1, size=size)
  batch_idxs = np.random.random_integers(low, high - 1, size=size)
  batch_idxs = np.random.random_integers(low, high - 1, size=size)
  batch_idxs = np.random.random_integers(low, high - 1, size=s

2765 episodes - episode_reward: -1.707 [-10.000, 1.000] - loss: 0.811 - mae: 1.224 - mean_q: -1.008

Interval 2 (10000 steps performed)
2902 episodes - episode_reward: -1.519 [-10.000, 1.000] - loss: 0.792 - mae: 1.178 - mean_q: -0.935

Interval 3 (20000 steps performed)
2871 episodes - episode_reward: -1.549 [-10.000, 1.000] - loss: 0.806 - mae: 1.184 - mean_q: -0.942

Interval 4 (30000 steps performed)
2863 episodes - episode_reward: -1.570 [-10.000, 1.000] - loss: 0.812 - mae: 1.191 - mean_q: -0.952

Interval 5 (40000 steps performed)
done, took 321.785 seconds


<keras.callbacks.History at 0x1c58a769430>

# Model Testing

In [10]:
class TestZoneEnv(Env):
    def __init__(self):
        # Actions we can take: Guess 1 of 4 zones
        self.action_space = Discrete(4)
        #  array
        self.observation_space = Box(low=np.array([0]), high=np.array([3]))
        # Set initial Zone Guess
        self.state = 0
        # Import the real Zone's sensor data
        sample = data[Num]
        self.sensor_data = sample
        # Set amount of guesses per run
        self.guess_length = 10
        
    def step(self, action):
        # Difine the sensor values for each Zone
        sensor_zone = [Zone_1, Zone_2, Zone_3, Zone_4]
        #apply the action
        self.state = action
        # Reduce guess length by 1
        self.guess_length -= 1 
        
        # Calculate reward
        # if the sensor data is in the chosen zone class, reward = 1
        if self.sensor_data in sensor_zone[self.state]: 
            reward = 1
            correct = True
        else: 
            reward = -1
            correct = False
        
        # Check if guessing is done
        if self.guess_length <= 0 or correct == True: 
            done = True
        else:
            done = False
        
        # Set placeholder for info
        info = {}
        
        # Return step information
        return self.state, reward, done, info

    def render(self):
        # Implement viz
        pass
    
    def reset(self):
        # Reset Initial Zone Guess
        self.state = 0
        # Import a new Zone's sensor data
        sample = data[Num]
        self.sensor_data = sample
        # Reset Guess amount
        self.guess_length = 10
        return self.state

In [51]:
# Choose the zone the Agent is in.

Zone = 3
Num = 5*(Zone -1) + 1

In [52]:
# Test to determine if the Agent knows where it is.
env = TestZoneEnv()

scores = dqn.test(env, nb_episodes=2, visualize=False)
print(np.mean(scores.history['episode_reward']))

Testing for 2 episodes ...
Episode 1: reward: 1.000, steps: 1
Episode 2: reward: 1.000, steps: 1
1.0
