In [1]:
import os
import gym
from darm_gym_env import DARMEnv
from stable_baselines3 import SAC
from stable_baselines3.common.vec_env.subproc_vec_env import SubprocVecEnv
from stable_baselines3.common.vec_env.vec_monitor import VecMonitor
from stable_baselines3.common.vec_env.vec_normalize import VecNormalize
from stable_baselines3.common.vec_env.dummy_vec_env import DummyVecEnv
from stable_baselines3.common.env_util import make_vec_env

import wandb
from wandb.integration.sb3 import WandbCallback
from stable_baselines3.common.callbacks import CallbackList, EvalCallback, StopTrainingOnRewardThreshold, StopTrainingOnNoModelImprovement

import numpy as np
from datetime import datetime

### Load Models

In [2]:
env_tag = "di"
run_name = f"SB3_SAC_{env_tag}_position_stiffen_2"
run_local_dir = f"{os.getenv('DARM_MUJOCO_PATH')}/darm_training/results/{env_tag}/{run_name}"

model_name_i = f"{run_local_dir}/models/best/best_model"
eval_model_i = SAC.load(model_name_i)
eval_model_i

<stable_baselines3.sac.sac.SAC at 0x7fea2b789a60>

In [3]:
env_tag = "dii"
run_name = f"SB3_SAC_{env_tag}_position_stiffen_2"
run_local_dir = f"{os.getenv('DARM_MUJOCO_PATH')}/darm_training/results/{env_tag}/{run_name}"

model_name_ii = f"{run_local_dir}/models/model"
# model_name_ii = f"{run_local_dir}/models/best/best_model"
eval_model_ii = SAC.load(model_name_ii)
eval_model_ii

<stable_baselines3.sac.sac.SAC at 0x7fea4206b880>

In [4]:
env_tag = "diii"
run_name = f"SB3_SAC_{env_tag}_position_stiffen_2"
run_local_dir = f"{os.getenv('DARM_MUJOCO_PATH')}/darm_training/results/{env_tag}/{run_name}"

model_name_iii = f"{run_local_dir}/models/best/best_model"
eval_model_iii = SAC.load(model_name_iii)
eval_model_iii

<stable_baselines3.sac.sac.SAC at 0x7fe9edf00b20>

In [5]:
env_tag = "div"
run_name = f"SB3_SAC_{env_tag}_position_stiffen_2"
run_local_dir = f"{os.getenv('DARM_MUJOCO_PATH')}/darm_training/results/{env_tag}/{run_name}"

model_name_iv = f"{run_local_dir}/models/best/best_model"
eval_model_iv = SAC.load(model_name_iv)
eval_model_iv

<stable_baselines3.sac.sac.SAC at 0x7fe9f877c7c0>

In [6]:
env_tag = "dv"
run_name = f"SB3_SAC_{env_tag}_position_stiffen_2"
run_local_dir = f"{os.getenv('DARM_MUJOCO_PATH')}/darm_training/results/{env_tag}/{run_name}"

model_name_v = f"{run_local_dir}/models/model"
# model_name_v = f"{run_local_dir}/models/best/best_model"
eval_model_v = SAC.load(model_name_v)
eval_model_v

<stable_baselines3.sac.sac.SAC at 0x7fea2bfcd6d0>

## Environment

In [7]:
env = DARMEnv(digits=["i", "ii", "iii", "iv", "v"],
                            start_state_file="DARMHand_MFNW_start_state.npy",
                            render_mode="human")

Loaded XML file successfully
Using step size `0.01256` units
Using `myo` reward type
Number of tendon position actuators: 30
Using `Box([-1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1.
 -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1.], [1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1.], (30,), float32)` action space


  logger.warn(


In [8]:
obs = env.reset()
obs

array([-9.53674612e-01, -3.93382484e+00,  1.07950825e+01,  5.76073431e-01,
        4.38386164e-01, -1.95317211e-01, -6.61670734e-01,  3.54763268e+00,
       -2.55334918e+00,  7.28864374e+00,  8.17527819e-01, -3.31722340e+00,
        9.07344727e+00, -1.64504838e+00, -3.75804497e+00,  1.00890234e+01,
        1.75278163e+00, -5.77873531e+00,  9.37024774e+00,  3.97721736e-03,
        9.95952564e-01, -8.64828717e-02,  2.41533590e-02,  2.93000000e+00,
        8.70000000e-01,  9.08000000e+00,  2.14997850e+00, -3.39588077e+00,
        1.13179070e+01,  1.71485111e+00, -5.93547779e+00,  1.05806749e+01,
        1.78038191e+00, -5.73100432e+00,  8.48162839e+00,  1.34000203e+00,
        5.48796293e+00,  1.76979790e+01,  9.74852016e-01,  2.20682871e-01,
        3.02603881e-02, -6.85021851e-03,  8.20000000e-01,  8.70000000e-01,
        9.33000000e+00,  1.22961745e+00,  3.89322541e+00,  1.35409763e+01,
        1.51683792e+00,  5.44935862e+00,  1.64936791e+01,  1.68522024e+00,
        4.38313439e+00,  

## Model Combination

In [9]:
obs = [env.get_obs(i) for i in range(5)]

for o in obs:
    print(o.shape)

(16,)
(19,)
(19,)
(19,)
(19,)


In [10]:
actions = []
actions.append(eval_model_i.predict(obs[0], deterministic=True)[0])
actions.append(eval_model_ii.predict(obs[1], deterministic=True)[0])
actions.append(eval_model_iii.predict(obs[2], deterministic=True)[0])
actions.append(eval_model_iv.predict(obs[3], deterministic=True)[0])
actions.append(eval_model_v.predict(obs[4], deterministic=True)[0])
actions

[array([-0.96874094,  0.9945488 , -0.9781129 , -0.9571279 , -0.80375725,
        -0.941601  ,  0.9631041 ,  0.96614707, -0.8560862 ], dtype=float32),
 array([-0.9917435 , -0.9858608 , -0.87369674, -0.98893034,  0.978533  ],
       dtype=float32),
 array([-0.99206936,  0.2629354 , -0.98428756, -0.9909652 ,  0.98523235],
       dtype=float32),
 array([ 0.997905 , -0.972115 , -0.9262421, -0.9816914,  0.9735168],
       dtype=float32),
 array([ 0.99963593, -0.97694963, -0.25092733, -0.77093995,  0.727625  ,
         0.8241062 ], dtype=float32)]

In [11]:
action = np.concatenate(actions)
action

array([-0.96874094,  0.9945488 , -0.9781129 , -0.9571279 , -0.80375725,
       -0.941601  ,  0.9631041 ,  0.96614707, -0.8560862 , -0.9917435 ,
       -0.9858608 , -0.87369674, -0.98893034,  0.978533  , -0.99206936,
        0.2629354 , -0.98428756, -0.9909652 ,  0.98523235,  0.997905  ,
       -0.972115  , -0.9262421 , -0.9816914 ,  0.9735168 ,  0.99963593,
       -0.97694963, -0.25092733, -0.77093995,  0.727625  ,  0.8241062 ],
      dtype=float32)

In [12]:
env.step(action)
env.render()

In [13]:
env.close()

## Env Loop

In [7]:
def get_action(obs):
    actions = []
    actions.append(eval_model_i.predict(obs[0], deterministic=True)[0])
    actions.append(eval_model_ii.predict(obs[1], deterministic=True)[0])
    actions.append(eval_model_iii.predict(obs[2], deterministic=True)[0])
    actions.append(eval_model_iv.predict(obs[3], deterministic=True)[0])
    actions.append(eval_model_v.predict(obs[4], deterministic=True)[0])
    
    return np.concatenate(actions)

In [8]:
import time



env = DARMEnv(digits=["i", "ii", "iii", "iv", "v"],
                            start_state_file="DARMHand_MFNW_start_state.npy",
                            render_mode="human")

def sleep(duration):
    start_time = time.time()
    while time.time() - start_time < duration:
        env.render()
        
env.reset()
done = False
sleep(10)

n_steps = 0

try:
    while not done:
        obs = [env.get_obs(i) for i in range(5)]
        action = get_action(obs)
        _, _, done, info = env.step(action)
        n_steps += 1
        
        if n_steps % 20 == 0:
            print(info["reward"]["bonus"])

        if done or n_steps == 200:
            env.reset()
            done = False
            n_steps = 0
            sleep(2)
except Exception as e:
    pass
finally:
    print("Closing environment")
    env.close()

Loaded XML file successfully
Using step size `0.01256` units
Using `myo` reward type
Number of tendon position actuators: 30
Using `Box([-1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1.
 -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1.], [1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1.], (30,), float32)` action space


  logger.warn(


[1. 2. 2. 2. 0.]
[0. 2. 1. 2. 0.]
[0. 2. 1. 2. 0.]
Closing environment


KeyboardInterrupt: 

### Add Contacts visualization to understand why failed poses?

In [21]:
env.close()