# ReActS: Recovering Action Sequences

This notebook analyses the datasets collected in environments with physical constraints, trying to find & analyze recovering action sequences

In [2]:
import numpy as np
import tensorflow as tf
import pathlib
import argparse
import json
import logging
import pathlib
from time import perf_counter

import tensorflow as tf
from copy import deepcopy
from colorama import Fore

from link_bot_data.dynamics_dataset import DynamicsDataset
from state_space_dynamics import model_utils
from moonshine.gpu_config import limit_gpu_mem
from link_bot_pycommon.pycommon import print_dict
from IPython.display import HTML
from link_bot_pycommon.get_scenario import get_scenario
from moonshine.moonshine_utils import dict_of_sequences_to_sequence_of_dicts_tf, dict_of_sequences_to_sequence_of_dicts, numpify
import matplotlib.pyplot as plt
from link_bot_data.classifier_dataset_utils import *
from link_bot_classifiers import classifier_utils
from matplotlib import rc

np.set_printoptions(precision=3, suppress=True)
plt.style.use("slides")

limit_gpu_mem(2)

In [8]:
take = None
scenario = get_scenario("link_bot")
dynamics_dataset = DynamicsDataset([pathlib.Path("./fwd_model_data/rope_more_obs_big/")])
n_total_train_examples = 8250
tf_dataset = dynamics_dataset.get_datasets(mode='train', take=take)
model_dirs = [pathlib.Path(f"./ss_log_dir/tf2_rope/{i}") for i in range(8)]
fwd_models, _ = model_utils.load_generic_model(model_dirs)

classifier_model_dir = pathlib.Path('log_data/rope_2_seq/May_24_01-12-08_617a0bee2a')
classifier_model = classifier_utils.load_generic_model(classifier_model_dir, scenario=scenario)

[36mRestored from ss_log_dir/tf2_rope/0/ckpt-40[39m
[36mRestored from ss_log_dir/tf2_rope/1/ckpt-44[39m
[36mRestored from ss_log_dir/tf2_rope/2/ckpt-45[39m
[36mRestored from ss_log_dir/tf2_rope/3/ckpt-43[39m
[36mRestored from ss_log_dir/tf2_rope/4/ckpt-40[39m
[36mRestored from ss_log_dir/tf2_rope/5/ckpt-38[39m
[36mRestored from ss_log_dir/tf2_rope/6/ckpt-40[39m
[36mRestored from ss_log_dir/tf2_rope/7/ckpt-49[39m
[36mRestored from log_data/rope_2_seq/May_24_01-12-08_617a0bee2a/ckpt-5[39m


In [9]:
labeling_params = {
    'threshold': 0.10,
    'state_key': 'link_bot',
}

In [10]:
def check_is_recovered(environment, state, action, next_state, threshold):
    actions = tf.expand_dims(action, axis=0)
    prediction = fwd_models.propagate_differentiable(environment, state, actions)[1]
    is_recovered = tf.linalg.norm(prediction['link_bot'] - next_state['link_bot']) < threshold
    return is_recovered, prediction

def check_sequence(example):
    inputs, outputs = example
    predictions = []
    actuals = []
    actions = []
    for start_t  in range(0, dynamics_dataset.max_sequence_length - 1):
        action = inputs['action'][start_t]
        state = {
            'link_bot': outputs['link_bot'][start_t]
        }
        next_state = {
            'link_bot': outputs['link_bot'][start_t + 1]
        }
        environment = scenario.get_environment_from_example(example)
        is_recovered, prediction = check_is_recovered(environment, state, action, next_state, labeling_params['threshold'])

        predictions.append(prediction)
        actuals.append(state)
        actions.append(action)
        is_recovered_sequence.append(is_recovered)
        if is_recovered:
            if len(is_recovered_sequence) > 1:
                actuals.append(next_state)
                return True, {
                    'actuals': actuals,
                    'actions': actions,
                    'environment': environment,
                    'predictions': predictions,
                    'is_recovered_sequence': is_recovered_sequence,
                }
    return False, None

def sample_actions(m):
    return tf.random.uniform(shape=[m, 1, 2], minval=-0.15, maxval=0.15)

In [11]:
%matplotlib
tf.random.set_seed(0)
n_reacts_examples = 0 
n_actions_sampled = 50
recovering_examples = []
for example in tf_dataset:
    inputs, outputs = example
    environment = scenario.get_environment_from_example(example)
    environment_batched = {k:tf.stack([v]*n_actions_sampled, axis=0) for k,v in environment.items()}
    for start_t  in range(0, dynamics_dataset.max_sequence_length - 2):
        action = inputs['action'][start_t]
        state_vec = outputs['link_bot'][start_t]
        start_state_np = {
            'link_bot': state_vec.numpy(),
        }
        random_actions = sample_actions(n_actions_sampled)
        
        state_batched = tf.expand_dims(tf.stack([state_vec]*n_actions_sampled, axis=0), axis=1)
        state_dict = {
            'link_bot': state_batched,
        }
        predictions = fwd_models.propagate_differentiable_batched(start_states=state_dict,
                                                                  actions=random_actions)
        p_needs_recovery = classifier_model.check_constraint_differentiable_batched_tf(environment=environment_batched,
                                                                                             predictions=predictions,
                                                                                             actions=random_actions)
        needs_recovery = p_needs_recovery < 0.5
        if tf.reduce_all(needs_recovery):
#             plt.figure()
#             ax = plt.gca()
#             scenario.plot_state(ax, start_state_np, color='r', s=50, zorder=1)
#             scenario.plot_environment(ax, numpify(environment))
#             for action in random_actions:
#                 scenario.plot_action(ax, start_state_np, action[0].numpy(), color='r', s=50, zorder=1)
#             print(p_needs_recovery)
#             plt.show(block=True)
            is_recovered_sequence = []
            is_recovering, recovering_data = check_sequence(example)
            if is_recovering:
                recovering_examples.append(recovering_data)
print(len(recovering_examples))

Using matplotlib backend: Qt5Agg
2


In [None]:
n_reacts_examples = len(recovering_examples)
print(f"{n_reacts_examples / n_total_train_examples * 100:.3f}%")

In [None]:
%matplotlib
def animate(recovering_example):
    anim = scenario.animate_recovering_actions_sequence(environment=numpify(recovering_example['environment']),
                                                        actions=recovering_example['actions'],
                                                        actual=recovering_example['actuals'],
                                                        predictions=recovering_example['predictions'],
                                                        fps=0.5)
    return anim

# anim = animate(recovering_examples[4])