In [1]:
import math
import random
import numpy as np
import matplotlib
import matplotlib.pyplot as plt

from itertools import count
from collections import namedtuple

In [2]:
# baseline imports
import gym
from gym import spaces
from gym.envs.classic_control import rendering

In [3]:
from scipy.spatial.distance import euclidean
import numpy as np
from numpy.random import randint
from itertools import permutations
from random import shuffle

"""
Actions:
    -Draw vertex
    -Measure distance
    -Measure angle
"""
# DEFINE ENVIRONMENT
class GeometryEnvironment(gym.Env):
    def __init__(self):

        # Example when using discrete actions:
        N_DISCRETE_ACTIONS = 3
        self.action_space = spaces.Box(low=-np.inf, high=np.inf, shape=(N_DISCRETE_ACTIONS,), dtype=np.float64)

        # Example for using image as input:
        #self.observation_space = spaces.Box(low=0, high=255, shape=(HEIGHT, WIDTH, N_CHANNELS), dtype=np.uint8)
        self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(6,), dtype=np.float64)
        self.reset()

    def place_vertex(self, bead_index, vertex_index):
        self.bead_set[bead_index] = vertex_index

    def measure_distance(self, u, v):
        return euclidean(u, v)
    
    def reset(self):
        #self.bead_set = np.array([(randint(low=1, high=100), randint(low=1, high=100)) for i in range(2)])
        self.bead_set = np.array([(0, 0) for i in range(2)])
        self.defining_state = (randint(low=1, high=100), randint(low=1, high=100))
        self.answer = 0
        self.correct_answer = np.sqrt(self.defining_state[0]**2 + self.defining_state[1]**2)
        return self.get_state()
    
    def step(self, action):
        # if distance should be measured
        #print("beads at start of step", self.bead_set)
        # convert tensor to format which can be manipulated
        action_index = action[0]
        action_vertex = (action[1], action[2])
        if (action_index < 1):
            self.answer = self.measure_distance(self.bead_set[0], self.bead_set[1])
        else:
            action_index = 0 if action_index < 10 else 1
            self.place_vertex(action_index, action_vertex)
        done = bool(np.abs(self.answer - self.correct_answer) < 10)
        if done:
            print("Gottem")
        reward = 10.0 if done else 0.0
        info = {}
        return self.get_state(), reward, done, info
    
    def get_state(self):
        a = np.array(self.defining_state)
        b = np.array(self.bead_set[0])
        c = np.array(self.bead_set[1])
        nump = np.concatenate((a, b, c))
        t_state = nump.flatten()
        return t_state
    
    def render(self):
        #print("Answer: ", self.answer)
        vertices = np.array([self.vertex_set[(self.bead_set[0])], self.vertex_set[(self.bead_set[1])]])
        #print(vertices)
        x, y = vertices.T
        plt.scatter(x, y)
        plt.show()
    
    def get_distances(self):
        # self.answer,
        return self.measure_distance(self.bead_set[0], self.bead_set[1]), self.correct_answer


In [4]:
import stable_baselines
from stable_baselines.common.policies import MlpPolicy, MlpLnLstmPolicy
from stable_baselines.common.vec_env import DummyVecEnv
from stable_baselines import A2C

# Instantiate the env
env = GeometryEnvironment()
#env = DummyVecEnv([lambda: env])

# Define and Train the agent
policy_kwargs = dict(net_arch=[128, dict(pi=[128, 128],vf=[256, 256, 128])])
model = A2C(MlpPolicy, env, verbose=1, gamma=0.9, n_steps=50, policy_kwargs=policy_kwargs, tensorboard_log="./hard_a2c/")

The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
  * https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.

Wrapping the env in a DummyVecEnv.




Instructions for updating:
Use keras.layers.flatten instead.
Instructions for updating:
Please use `layer.__call__` method instead.





Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where

Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor




In [5]:
#from stable_baselines.common.env_checker import check_env
# It will check your custom environment and output additional warnings if needed
#check_env(env)

In [6]:
"""
REFERENCE PARAMETERS
BATCH_SIZE = 128
GAMMA = 0.5 #0.999
EPS_START = 0.9
EPS_END = 0.05
EPS_DECAY = 50
TARGET_UPDATE = 10
"""

'\nREFERENCE PARAMETERS\nBATCH_SIZE = 128\nGAMMA = 0.5 #0.999\nEPS_START = 0.9\nEPS_END = 0.05\nEPS_DECAY = 50\nTARGET_UPDATE = 10\n'

In [14]:
model.learn(total_timesteps=10000)

---------------------------------
| explained_variance | -75.1    |
| fps                | 73       |
| nupdates           | 1        |
| policy_entropy     | 4.26     |
| total_timesteps    | 50       |
| value_loss         | 0.000235 |
---------------------------------
---------------------------------
| explained_variance | -1.46    |
| fps                | 547      |
| nupdates           | 100      |
| policy_entropy     | 4.27     |
| total_timesteps    | 5000     |
| value_loss         | 5.31e-06 |
---------------------------------
---------------------------------
| explained_variance | -21.6    |
| fps                | 588      |
| nupdates           | 200      |
| policy_entropy     | 4.28     |
| total_timesteps    | 10000    |
| value_loss         | 1.86e-05 |
---------------------------------


<stable_baselines.a2c.a2c.A2C at 0x7f11186846a0>

In [15]:
# Test the trained agent
import time
test_states = []
test_actions = []

obs = env.reset()

n_steps = 50
for step in range(n_steps):
    test_states.append(obs)
    action, _ = model.predict(obs, deterministic=True)
    print("Step {}".format(step + 1))
    print("Action: ", action)
    obs, reward, done, info = env.step(action)
    print('obs=', obs, 'reward=', reward, 'done=', done)

    test_actions.append(action)

    #env.render()
    time.sleep(0.4)
    if done:
    # Note that the VecEnv resets automatically
    # when a done signal is encountered
        print("Goal reached!", "reward=", reward)
        break
#input()
#env.close()

Step 1
Action:  [ 0.01116171 -0.0065893   0.00839518]
obs= [98 33  0  0  0  0] reward= 0.0 done= False
Step 2
Action:  [ 0.01116171 -0.0065893   0.00839518]
obs= [98 33  0  0  0  0] reward= 0.0 done= False
Step 3
Action:  [ 0.01116171 -0.0065893   0.00839518]
obs= [98 33  0  0  0  0] reward= 0.0 done= False
Step 4
Action:  [ 0.01116171 -0.0065893   0.00839518]
obs= [98 33  0  0  0  0] reward= 0.0 done= False
Step 5
Action:  [ 0.01116171 -0.0065893   0.00839518]
obs= [98 33  0  0  0  0] reward= 0.0 done= False
Step 6
Action:  [ 0.01116171 -0.0065893   0.00839518]
obs= [98 33  0  0  0  0] reward= 0.0 done= False
Step 7
Action:  [ 0.01116171 -0.0065893   0.00839518]
obs= [98 33  0  0  0  0] reward= 0.0 done= False
Step 8
Action:  [ 0.01116171 -0.0065893   0.00839518]
obs= [98 33  0  0  0  0] reward= 0.0 done= False
Step 9
Action:  [ 0.01116171 -0.0065893   0.00839518]
obs= [98 33  0  0  0  0] reward= 0.0 done= False
Step 10
Action:  [ 0.01116171 -0.0065893   0.00839518]
obs= [98 33  0  0 

In [16]:
import matplotlib.animation as animation
from IPython.display import HTML

measurements = np.array(test_actions)[:, 0] < 1
#=========================================
# Animate Images

fig = plt.figure(figsize=(10, 10))
ims = []
for i in range(len(test_states)):
    x = np.array(test_states[i])[2:5:2]
    y = np.array(test_states[i])[3:6:2]
    if (measurements[i]):
        ims.append([plt.scatter(x, y, animated=True)])
        im, = plt.plot(x, y, 'ro-', animated=True)
    else:
        im = plt.scatter(x, y, animated=True)
    ims.append([im])

ani = animation.ArtistAnimation(fig, ims, interval=300, blit=True, repeat_delay=1000)
plt.close()

# Show the animation
HTML(ani.to_jshtml())

KeyboardInterrupt: 

In [23]:
from matplotlib import cm
from mpl_toolkits.mplot3d import Axes3D  # noqa: F401 unused import

import matplotlib.pyplot as plt
from matplotlib.ticker import LinearLocator, FormatStrFormatter
import numpy as np


def compute_steps(x_val, y_val, max_count=100):
        env.reset()
        env.set_state(x_val, y_val)
        obs = env.get_state()

        for t in count():
            action, _ = model.predict(obs, deterministic=True)
            obs, reward, done, info = env.step(action)
            if done or t > max_count:
                return t
                

vectorized_steps = np.vectorize(compute_steps)


def test_range(x, y, max_count=100):
    X, Y = np.meshgrid(x, y)
    Z = vectorized_steps(X, Y)

    plt.pcolormesh(X, Y, Z)
    plt.colorbar()
    plt.show()

    optimal = Z < 3
    plt.pcolormesh(X, Y, optimal)
    plt.show()

In [24]:
x_simple = y_simple = np.linspace(-100, 100, 20)
test_range(x_simple, y_simple)

AttributeError: 'GeometryEnvironment' object has no attribute 'set_state'

In [25]:
x_simple = y_simple = np.linspace(1, 100, 20)
test_range(x_simple, y_simple)

AttributeError: 'GeometryEnvironment' object has no attribute 'set_state'

In [26]:
x_simple = y_simple = np.linspace(1, 1000, 20)
test_range(x_simple, y_simple)

AttributeError: 'GeometryEnvironment' object has no attribute 'set_state'

In [27]:
x_simple = y_simple = np.linspace(1, 100000, 20)
test_range(x_simple, y_simple)

AttributeError: 'GeometryEnvironment' object has no attribute 'set_state'

In [28]:
x_simple = y_simple = np.linspace(1, 1000000000, 20)
test_range(x_simple, y_simple)

AttributeError: 'GeometryEnvironment' object has no attribute 'set_state'