In [1]:
%load_ext autoreload
%autoreload 2

import time
from PIL import Image
import matplotlib.pyplot as plt
import numpy as np
from tqdm import trange
import gym
from many_world.h_maze import HMazeEnv
from ml_logger import logger
from ml_logger.nb import show

ge.envs environments are now added!


# Overview: A Simple Peg Insertion Domain

Having a large configuration space will increase sample complexity and
make the problem unnecessarily more difficult. The most straightforward
way to limit awkward poses is by directly restricting the joint angles
which does not require computing the absolute position of each joint
in the 2D plain. An added benefit is the state can be initialized using
a uniform distribution. Filtering also becomes less necessary.

The joints are limited to the following range:

```
low = [0, -2.5, 0]
high = [1.5, 0, 2.7]
```

The slot is positioned 28 cm to the right. The arena is the same size
as the other maze environments -- 28 x 28 cm.

Notable variants:
- `mixed-mode`: in this mode we flash the slot on and off if the
    end-effector position is to the left of the slot. This should be
    the default variant for our experiments.

# ToDos

The goal is 1. Evaluate the main method on peg insertion task.
2. show generalization (GRED)

- [ ] Does the local metric learning work with mixed mode?
    - [x] add new configuration

        The issue is we need to add results from multiple sampling environments. This
        is required for learning the local metric function.

        After learning, we can just run standard test environment.
    - [x] Added mixed environment (`many_world:Peg2D-mixed-v0`),
        which randomly switches `env.free` each time
        reset_model is called.

    - [x] test new configuration
    - train local metric and inverse model
- [ ] Does Reaching work with the mixed mode?
    - evaluate MPC
- [ ] How about insertion?
    - add sampling

After this:

- [ ] add dynamic pruning code
- [ ] evaluate MPC
- [ ] run GRED
- [ ] evaluate GRED. should do better than before

### Done
- [x] add one-sided version
- [x] add success reward/metric [Getting it to work with SAC?]
- [x] Implement and verify the `mixed-mode`

Two tasks are available: A free-reaching task without the slot on the right,
and a peg-insertion task.

*3-Link Arm: Free*

In [2]:
env = gym.make("many_world:Peg2D-v0", free=True)
env.seed(100)
obs = env.reset()
# with env.with_color(-1):
rgb_img = env.render('rgb', width=100, height=100)

env = gym.make("many_world:Peg2D-v0", free=True, in_slot=0)
env.seed(100)

imgs = []
for i in trange(40):
    env.reset()
    imgs.append(env.render('rgb', width=100, height=100))

show(rgb_img, obs['img'][0], obs['goal_img'][0], np.stack(imgs, axis=-1).min(axis=-1), titles=["Render", "Img", "Goal", "rho_0"])

  0%|          | 0/40 [00:00<?, ?it/s]

Creating offscreen glfw
Creating offscreen glfw
Creating offscreen glfw


 18%|█▊        | 7/40 [00:00<00:02, 11.08it/s]

Creating offscreen glfw


100%|██████████| 40/40 [00:01<00:00, 39.74it/s]


*Peg Insertion*

In [3]:
env = gym.make("many_world:Peg2D-v0", free=False)
env.seed(200)
obs = env.reset_model(slot_y=0)
# info: the object is not the last body anymore.
rgb_img = env.render('rgb', width=100, height=100)

env = gym.make("many_world:Peg2D-v0", free=False, in_slot=0)
env.seed(100)

imgs = []
for i in trange(40):
    obs = env.reset_model(slot_y=0)
    imgs.append(env.render('rgb', width=100, height=100))

show(rgb_img, obs['img'][0], obs['goal_img'][0], np.stack(imgs, axis=-1).min(axis=-1), titles=["Render", "Img", "Goal", "rho_0"])

  0%|          | 0/40 [00:00<?, ?it/s]

Creating offscreen glfw
Creating offscreen glfw
Creating offscreen glfw
Creating offscreen glfw


100%|██████████| 40/40 [00:01<00:00, 34.33it/s]


Inspect the reward

In [4]:
env = gym.make("many_world:Peg2D-v0", free=True, width=48, height=48)

frames = []

obs, done, i  = env.reset(), False, 0
while not done:
    i += 1
    act =  np.sign(obs['goal'] - obs['x'])
    obs, reward, done, info = env.step(act * 0.5)
    # print(f"\r {reward}", info, obs['x'], obs['goal'], end='')
    frames.append(np.concatenate([obs['img'], obs['goal_img'] - obs['img']], axis=-1)[0])

show(*frames)

Creating offscreen glfw


Now test the peg insertion reward (and terminal condition).

Note that the `env.reset_model` takes in an integer goal position.

In [5]:
# use the non-free option
env = gym.make("many_world:Peg2D-v0", width=48, height=48)

frames = []

obs, done, i  = env.reset_model(x=[1.1, -2.2, 1.1], slot_y=0), False, 0
while not done and not logger.every(100):
    i += 1
    act =  np.sign(obs['goal'] - obs['x'])
    obs, reward, done, info = env.step(act * 0.5)
    # print(f"\r {reward}", info, obs['x'], obs['goal'], end='')
    frames.append(np.concatenate([obs['img'], obs['goal_img'] - obs['img']], axis=-1)[0])

show(*frames)


Creating offscreen glfw


## Mixed-Mode

Now test the "Mixed mode". In this mode, the slot would flash on and off
during rendering, unless the peg is less than 1 cm away from the wall.

This way, the local metric would learn to ignore the slot before it
is relevant.

Specification:

- show slot w/ 50% chance unless end-effector is to the right of 2.7(5)
 cm.

> Note: do *NOT* set the `slot_y` in `env.reset_model` under the free mode.
> The mix_mode includes goal positions. Setting goals through that
> function call would only override the logic that moves the slot
> out-of-the-way.

In [6]:
env = gym.make("many_world:Peg2D-v0", free=True, mix_mode=(1, 0),
               width=36, height=36)
env.seed(100)

frames = []

for i in range(10):
    obs = env.reset_model()
    frames.append(np.concatenate([obs['img'], obs['goal_img']], axis=-1)[0])

show(*frames)

Creating offscreen glfw


In [7]:
env = gym.make("many_world:Peg2D-v0", free=False, mix_mode=(1, 0),
               width=36, height=36)
env.seed(100)

frames = []

for i in range(10):
    obs = env.reset_model()
    frames.append(np.concatenate([obs['img'], obs['goal_img']], axis=-1)[0])

show(*frames)

Creating offscreen glfw


# To Do

- [ ] set up the sampling envs for the peg-insertion task

    - use both free and slot with mixed-mode during sampling.
    - add 3-slot variant during traing
    - test with 2-slot unseen during training.

In [8]:
env = gym.make("many_world:Peg2D-mixed-v0", width=36, height=36)
env.seed(100)

frames = []

for i in range(60):
    obs = env.reset_model()
    frames.append(np.concatenate([obs['img'], obs['goal_img']], axis=-1)[0])

show(*frames)

Creating offscreen glfw


Now, you use use the `mix_mode` keyword argument to
insert multiple slot locations.