In [1]:
import gym
import numpy as np

In [2]:
from ipywidgets import interact, interactive, fixed, interact_manual, Output
import ipywidgets as widgets
from IPython.display import display
from IPython.display import clear_output
import functools


In [3]:
%matplotlib notebook
import matplotlib.pyplot as plt

In [4]:
#blgd-v0 - BlindGrasp with Discrete actions-v0
#GUT=True - works only locally and not in Google Colab
#ResetCount=250, Reset after 250 actions
env=gym.make("gym_blg:blgd-v0", GUI=True,ResetCount=250)

In [5]:
env.reset()

array([0., 0., 0., ..., 0., 0., 0.])

In [6]:
def decode_obs(obs):
    #decodes the observation into sensor readings (normalized)
    #proximity
    # - Proximity sensor array, 22 values, 0 or 1
    # - we are using only using the sensors from the tip - 7 on side and 4 at tip of each fingwe
    #pos
    # - End effector x,y,z position - (stacked with 2 previous positions for temporal info)
    #   9 values - normalized between 0 and 1 
    #force
    # - End Effector x,y,x force - 3 values, normalized between 0-1
    #ObjMap
    # - The workspace(tray) are is divided into 32x32 cells
    # - ObjMap marks those positions/cells where the proximity sensors detected the presence of any object
    # - If object present 1, else 0
    #VisitMap
    # - VisitMap marks the cells (of 32x32 array) visited by end effector as 1.
    # - This info is to incentivize the agent to visit unexplored cells
    #CurPosMap
    # - The cell corresponding to the xy position of end effector is 1 in this 32x32 array
    #GelSight1 and GelSight2
    # - GelSight Depth data from the gripper, 32x32 each
    # - values normalized between 0-1
    prox = obs[:22]
    pos = obs[22:31]
    force = obs[31:34]
    ObjMap= obs[34:1058] #1058 = 34 +(32*32)
    VisitMap = obs[1058:2082]
    CurPosMap =obs[2082:3106]
    GelSight1=obs[3106:4130]
    GelSight2=obs[4130:5154]
    return prox,pos,force,ObjMap,VisitMap,CurPosMap,GelSight1, GelSight2
    

In [7]:
#GUI button callback
def on_button_clicked(b, rs_=0):
    #step
    obs, rew,Done,misc  = env.step(rs_)
    #decode observations
    prox,pos,force,ObjMap,VisitMap,CurPosMap,GelSight1, GelSight2 = decode_obs(obs)
    #update maps
    #add CurPosMap to ObjMap and VisitMap for more intuitive visualization
    map_id1.set_data((ObjMap.reshape(32,32)+(CurPosMap.reshape(32,32)*10.0)))
    map_id2.set_data((VisitMap.reshape(32,32)+(CurPosMap.reshape(32,32)*10.0)))
    #map_id3.set_data(CurPosMap.reshape(32,32))
    map_id4.set_data(GelSight1.reshape(32,32))
    map_id5.set_data(GelSight2.reshape(32,32))
    #TODO -  save the state-action-obs-reward for pretraining the agent
    rBox.value=rew
    #print(rew)
    
    

In [8]:
# Reset callback from GUI button
def resetenv(b):
    env.reset()

In [9]:
#define the GUI buttons
buttonl = widgets.Button(description="Left")
buttonr = widgets.Button(description="Right")
buttonf = widgets.Button(description="Fwd")
buttonb = widgets.Button(description="Back")
buttonl.style.button_color = 'lightgreen'
buttonr.style.button_color = 'lightgreen'
buttonf.style.button_color = 'lightgreen'
buttonb.style.button_color = 'lightgreen'

buttonfl = widgets.Button(description="F Left")
buttonfr = widgets.Button(description="F Right")
buttonbl = widgets.Button(description="B Left")
buttonbr = widgets.Button(description="B Right")

buttonu = widgets.Button(description="Up")
buttond = widgets.Button(description="Down")
buttonuj = widgets.Button(description="Up 10x")
buttondj = widgets.Button(description="Down 10x")
buttonu.style.button_color = 'lightblue'
buttond.style.button_color = 'lightblue'
buttonuj.style.button_color = 'lightblue'
buttondj.style.button_color = 'lightblue'


buttongro = widgets.Button(description="Gripper Open")
buttongrc = widgets.Button(description="Gripper Close")
buttongro.style.button_color = 'moccasin'
buttongrc.style.button_color = 'moccasin'

buttonrst = widgets.Button(description="RESET")
buttonrst.style.button_color = 'red'
#Define the callback
buttonl.on_click(functools.partial(on_button_clicked, rs_=4))
buttonr.on_click(functools.partial(on_button_clicked, rs_=5))
buttonf.on_click(functools.partial(on_button_clicked, rs_=6))
buttonb.on_click(functools.partial(on_button_clicked, rs_=7))

buttonfl.on_click(functools.partial(on_button_clicked, rs_=8))
buttonfr.on_click(functools.partial(on_button_clicked, rs_=9))
buttonbl.on_click(functools.partial(on_button_clicked, rs_=10))
buttonbr.on_click(functools.partial(on_button_clicked, rs_=11))

buttonu.on_click(functools.partial(on_button_clicked, rs_=0))
buttond.on_click(functools.partial(on_button_clicked, rs_=1))
buttonuj.on_click(functools.partial(on_button_clicked, rs_=2))
buttondj.on_click(functools.partial(on_button_clicked, rs_=3))

buttongro.on_click(functools.partial(on_button_clicked, rs_=12))
buttongrc.on_click(functools.partial(on_button_clicked, rs_=13))

buttonrst.on_click(resetenv)

rBox=widgets.FloatText(value=2.0, disabled=True)
#organize the buttons
col1=widgets.VBox([buttongro,buttonbl,buttonl,buttonfl])
col2=widgets.VBox([buttonb,buttonu,buttond,buttonf])
col3=widgets.VBox([buttongrc,buttonbr,buttonr,buttonfr])
col4=widgets.VBox([buttonuj,buttondj,buttonrst,rBox])

In [10]:
#plots
f = plt.figure(figsize=(4,1.5),dpi= 250)

i1=f.add_subplot(1,4,1)
i1.set_title('ObjMap ')
i1.axis('off')
map_id1=plt.imshow((np.random.rand(32,32)*10)//2)
i2=f.add_subplot(1,4,2)
i2.set_title('VisitMap ')
i2.axis('off')
map_id2=plt.imshow((np.random.rand(32,32)*10)//2)
#i3=f.add_subplot(1,5,3)
#i3.set_title('CurPosMap ')
#i3.axis('off')
#map_id3=plt.imshow((np.random.rand(32,32)*10)//2)
i4=f.add_subplot(1,4,3)
i4.set_title('GelS-1 ')
i4.axis('off')
map_id4=plt.imshow((np.random.rand(32,32)*10)//2)
i5=f.add_subplot(1,4,4)
i5.set_title('GelS-2 ')
i5.axis('off')
map_id5=plt.imshow((np.random.rand(32,32)*10)//2)

#Display GUI
widgets.HBox([col1,col2,col3,col4])


<IPython.core.display.Javascript object>

HBox(children=(VBox(children=(Button(description='Gripper Open', style=ButtonStyle(button_color='moccasin')), …

In [None]:
from stable_baselines import PPO2
from stable_baselines.gail import ExpertDataset
#TODO - save the action-observation-reward from human demonstration as specified in
# https://stable-baselines.readthedocs.io/en/master/guide/pretrain.html#data-structure-of-the-expert-dataset