In [1]:
import pyglet
import numpy as np

In [2]:
class ArmEnv(object):
    viewer = None
    dt = 0.1 # refresh rate
    action_bound = [-1, 1]
    goal = {'x':100., 'y':100., 'l':40}
    state_size = 2 # theta_1, theta_2
    action_size = 2 # dtheta_1, dtheta_2

    def __init__(self):
        self.arm_info = np.zeros(2, dtype=[('l', np.float32), ('r', np.float32)])
        self.arm_info['l'] = 100 # Link lenghts
        self.arm_info['r'] = np.pi/6 # Link angles (rad) 

    def step(self,action):
        """
        Function : step
        Define : Used to take action from the current state and get next_state, reward, done from it
        Input : action 
        Ouput : next_state, reward, done
        ----------------
        Updates : arm_info i.e in states 
        """
        done = False
        r = 0.
        action = np.clip(action, *self.action_bound)
        # Taking action
        self.arm_info['r'] += action*self.dt
        self.arm_info['r'] %=np.pi*2 # normalize
        
        # state 
        s = self.arm_info['r']
        
        (a1l, a2l) = self.arm_info['l']
        (a1r, a2r) = self.arm_info['r']
        a1xy = np.array([200., 200.]) # (x0, y0) intial start point
        a1xy_ = np.array([np.cos(a1r), np.sin(a1r)]) * a1l + a1xy # (x1 ,y1)
        finger = np.array(np.cos(a1r+a2r), np.sin(a1r+a2r)) * a2l + a1xy_ # (x2, y2)

        # done and reward
        if (self.goal['x'] - self.goal['l']/2 < finger[0] < self.goal['x'] + self.goal['l']/2) and (self.goal['y'] - self.goal['l']/2 < finger[0] < goal['y'] + goal['l']/2):
            done = True
            reward = 1.0 # Sparse reward
        return s, r, done
        
    def reset(self):
        """
        Function : reset
        Define : Used to get initial state after starting 
        """
        self.arm_info['r'] = 2 * np.pi * np.random.rand(2)
        return self.arm_info['r']

    def render(self):
        if self.viewer is None:
            self.viewer = Viewer(self.arm_info, self.goal)
        self.viewer.render()
        
    def sample_action(self):
        return np.random.rand(2) - 0.5

In [3]:
class Viewer(pyglet.window.Window):
    bar_thc = 5

    def __init__(self, arm_info, goal):
        # vsync=False to not use the monitor FPS, we can speed up training
        super(Viewer, self).__init__(width=400, height=400, resizable=False, caption='Arm', vsync=False)
        pyglet.gl.glClearColor(1, 1, 1, 1)
        self.arm_info = arm_info
        self.center_coord = np.array([200, 200])

        self.batch = pyglet.graphics.Batch()    # display whole batch at once
        self.goal = self.batch.add(
            4, pyglet.gl.GL_QUADS, None,    # 4 corners
            ('v2f', [goal['x'] - goal['l'] / 2, goal['y'] - goal['l'] / 2,                # location
                     goal['x'] - goal['l'] / 2, goal['y'] + goal['l'] / 2,
                     goal['x'] + goal['l'] / 2, goal['y'] + goal['l'] / 2,
                     goal['x'] + goal['l'] / 2, goal['y'] - goal['l'] / 2]),
            ('c3B', (86, 109, 249) * 4))    # color
        self.arm1 = self.batch.add(
            4, pyglet.gl.GL_QUADS, None,
            ('v2f', [250, 250,                # location
                     250, 300,
                     260, 300,
                     260, 250]),
            ('c3B', (249, 86, 86) * 4,))    # color
        self.arm2 = self.batch.add(
            4, pyglet.gl.GL_QUADS, None,
            ('v2f', [100, 150,              # location
                     100, 160,
                     200, 160,
                     200, 150]), ('c3B', (249, 86, 86) * 4,))

    def render(self):
        self._update_arm()
        self.switch_to()
        self.dispatch_events()
        self.dispatch_event('on_draw')
        self.flip()

    def on_draw(self):
        self.clear()
        self.batch.draw()

    def _update_arm(self):
        (a1l, a2l) = self.arm_info['l']     # radius, arm length
        (a1r, a2r) = self.arm_info['r']     # radian, angle
        a1xy = self.center_coord            # a1 start (x0, y0)
        a1xy_ = np.array([np.cos(a1r), np.sin(a1r)]) * a1l + a1xy   # a1 end and a2 start (x1, y1)
        a2xy_ = np.array([np.cos(a1r+a2r), np.sin(a1r+a2r)]) * a2l + a1xy_  # a2 end (x2, y2)

        a1tr, a2tr = np.pi / 2 - self.arm_info['r'][0], np.pi / 2 - self.arm_info['r'].sum()
        xy01 = a1xy + np.array([-np.cos(a1tr), np.sin(a1tr)]) * self.bar_thc
        xy02 = a1xy + np.array([np.cos(a1tr), -np.sin(a1tr)]) * self.bar_thc
        xy11 = a1xy_ + np.array([np.cos(a1tr), -np.sin(a1tr)]) * self.bar_thc
        xy12 = a1xy_ + np.array([-np.cos(a1tr), np.sin(a1tr)]) * self.bar_thc

        xy11_ = a1xy_ + np.array([np.cos(a2tr), -np.sin(a2tr)]) * self.bar_thc
        xy12_ = a1xy_ + np.array([-np.cos(a2tr), np.sin(a2tr)]) * self.bar_thc
        xy21 = a2xy_ + np.array([-np.cos(a2tr), np.sin(a2tr)]) * self.bar_thc
        xy22 = a2xy_ + np.array([np.cos(a2tr), -np.sin(a2tr)]) * self.bar_thc

        self.arm1.vertices = np.concatenate((xy01, xy02, xy11, xy12))
        self.arm2.vertices = np.concatenate((xy11_, xy12_, xy21, xy22))

In [4]:
if __name__ == '__main__':
    env = ArmEnv()
    while True:
        env.render()
        env.step(env.sample_action())

TypeError: unsupported operand type(s) for *: 'float' and 'NoneType'

In [66]:
viewer = None
dt = 0.1    # refresh rate
action_bound = [-1, 1]
goal = {'x': 100., 'y': 100., 'l': 40} # l : tolerance level
state_dim = 2
action_dim = 2

In [67]:
goal

{'x': 100.0, 'y': 100.0, 'l': 40}

In [68]:
arm_info = np.zeros(2, dtype=[('l', np.float32), ('r', np.float32)])

In [69]:
arm_info

array([(0., 0.), (0., 0.)], dtype=[('l', '<f4'), ('r', '<f4')])

In [87]:
# Setting right and left values
arm_info['l'] = 100 # arm lengths
arm_info['r'] = np.pi/6 # arm angles
arm_info

array([(100., 0.5235988), (100., 0.5235988)],
      dtype=[('l', '<f4'), ('r', '<f4')])

In [88]:
action = np.random.rand(2)-0.5
action

array([ 0.00662357, -0.0236483 ])

In [89]:
action_bound

[-1, 1]

In [90]:
action = np.clip(action, *action_bound)
action

array([ 0.00662357, -0.0236483 ])

In [91]:
dt

0.1

In [92]:
action*dt

array([ 0.00066236, -0.00236483])

In [93]:
arm_info['r'] += action*dt
arm_info

array([(100., 0.5242612), (100., 0.521234 )],
      dtype=[('l', '<f4'), ('r', '<f4')])

In [94]:
action = np.random.rand(2)-0.5
action = np.clip(action, *action_bound)
action

array([ 0.26663102, -0.00668687])

In [95]:
arm_info['r'] += action*dt
arm_info

array([(100., 0.5509243), (100., 0.5205653)],
      dtype=[('l', '<f4'), ('r', '<f4')])

In [102]:
print("s:", arm_info['r'])
for i in range(10):
    action = np.random.rand(2) - 0.5
    #print("raw action :", action )
    
    action = np.clip(action, *action_bound)
    #print("clipped action :", action)
    #print("a:", action)
    
    arm_info['r'] += action*dt
    print("a:", action, " s:", arm_info['r'])
    arm_info['r'] %=np.pi*2
    print("a: norm", action, " s: norm", arm_info['r'])

s: [0.58320576 0.3939816 ]
a: [ 0.03412042 -0.42184773]  s: [0.5866178  0.35179684]
a: norm [ 0.03412042 -0.42184773]  s: norm [0.5866178  0.35179684]
a: [-0.23206651 -0.24038584]  s: [0.5634112  0.32775825]
a: norm [-0.23206651 -0.24038584]  s: norm [0.5634112  0.32775825]
a: [-0.44564124  0.28144298]  s: [0.51884705 0.35590255]
a: norm [-0.44564124  0.28144298]  s: norm [0.51884705 0.35590255]
a: [0.40174798 0.10305125]  s: [0.55902183 0.3662077 ]
a: norm [0.40174798 0.10305125]  s: norm [0.55902183 0.3662077 ]
a: [ 0.34931406 -0.43726099]  s: [0.59395325 0.3224816 ]
a: norm [ 0.34931406 -0.43726099]  s: norm [0.59395325 0.3224816 ]
a: [-0.34598807  0.20522368]  s: [0.5593544  0.34300396]
a: norm [-0.34598807  0.20522368]  s: norm [0.5593544  0.34300396]
a: [-0.49967801  0.38295465]  s: [0.5093866  0.38129944]
a: norm [-0.49967801  0.38295465]  s: norm [0.5093866  0.38129944]
a: [0.14427555 0.33079735]  s: [0.52381414 0.41437918]
a: norm [0.14427555 0.33079735]  s: norm [0.52381414 0

In [97]:
# state
s = arm_info['r']
s

array([0.58320576, 0.3939816 ], dtype=float32)

In [98]:
(a1l, a2l) = arm_info['l']
a1l, a2l

(100.0, 100.0)

In [99]:
(a1r, a2r) = arm_info['r']
a1r, a2r

(0.58320576, 0.3939816)

In [100]:
# a1 start (x0, y0)
a1xy = np.array([200., 200.])
a1xy_ = np.array([np.cos(a1r), np.sin(a1r)])*a1l + a1xy # a1 end (x1, y1)
finger = np.array([np.cos(a1r+a2r), np.sin(a1r+a2r)])*a2l + a1xy_ # a2 end (x2, y2)

In [101]:
goal

{'x': 100.0, 'y': 100.0, 'l': 40}

In [None]:
# done and reward
if (goal['x'] - goal['l']/2 < finger[0] < goal['x'] + goal['l']/2) 
   and (goal['y'] - goal['l']/2 < finger[1] < goal['y'] + goal['l']/2):
    reward = 1.
    done = True

In [103]:
a1r, a2r

(0.58320576, 0.3939816)

In [105]:
np.pi/2 - arm_info['r'][0], np.pi/2 - arm_info['r'].sum()

(1.086925790705953, 0.6879623691227774)

In [106]:
arm_info['r']

array([0.48387054, 0.39896345], dtype=float32)

In [107]:
arm_info['r'][0]

0.48387054

In [108]:
arm_info['r'].sum()

0.88283396