# Experiments

In [6]:
import os

import tensorflow_datasets as tfds
import cv2
import mediapy
import numpy as np
import torch

from language_table.environments import blocks
from language_table.environments import language_table
from language_table.environments.rewards import block2block
from matplotlib import pyplot as plt
from skimage.metrics import structural_similarity as ssim
from tqdm import tqdm
from torch.utils.data import DataLoader

import warnings
warnings.filterwarnings("ignore")

# Language Table Environment

In [11]:
def decode_inst(inst):
  """Utility to decode encoded language instruction"""
  return bytes(inst[np.where(inst != 0)].tolist()).decode("utf-8")

In [12]:
DATASET_VERSION = '0.0.1'
DATASET_NAME = 'language_table_blocktoblock_sim'  # CHANGEME: change this to load another dataset.

dataset_directories = {
    'language_table_blocktoblock_sim': 'gs://gresearch/robotics/language_table_blocktoblock_sim'
}

dataset_path = os.path.join(dataset_directories[DATASET_NAME], DATASET_VERSION)

In [13]:
builder = tfds.builder_from_directory(dataset_path)
episode_ds = builder.as_dataset(split='train')

print(episode_ds.element_spec)

{'episode_id': TensorSpec(shape=(), dtype=tf.string, name=None), 'steps': DatasetSpec({'action': TensorSpec(shape=(2,), dtype=tf.float32, name=None), 'is_first': TensorSpec(shape=(), dtype=tf.bool, name=None), 'is_last': TensorSpec(shape=(), dtype=tf.bool, name=None), 'is_terminal': TensorSpec(shape=(), dtype=tf.bool, name=None), 'observation': {'effector_target_translation': TensorSpec(shape=(2,), dtype=tf.float32, name=None), 'effector_translation': TensorSpec(shape=(2,), dtype=tf.float32, name=None), 'instruction': TensorSpec(shape=(512,), dtype=tf.int32, name=None), 'rgb': TensorSpec(shape=(360, 640, 3), dtype=tf.uint8, name=None)}, 'reward': TensorSpec(shape=(), dtype=tf.float32, name=None)}, TensorShape([]))}


# Restore the sequence of episodes

Take the last frame of episode and find the most similar frame (in the begining) from other episodes.

In [14]:
def MSE(img1, img2):
        squared_diff = (img1 -img2) ** 2
        summed = np.sum(squared_diff)
        num_pix = img1.shape[0] * img1.shape[1]
        err = summed / num_pix
        return err

In [15]:
instruction, rgb = [], []
is_first, is_last, is_terminal = [], [], []
best_score = 100
score = 0
for num, data in enumerate(episode_ds):
    instructions = [decode_inst(step['observation']['instruction']) for step in data['steps'].as_numpy_iterator()]
    images = [step['observation']['rgb'] for step in  data['steps'].as_numpy_iterator()]
    if num == 0:
        ref_image = [step['observation']['rgb'] for step in  data['steps'].as_numpy_iterator()][-1]
        
    else:
        
        score = MSE(ref_image, images[0])
        if score < best_score:
            best_score = score
            best_num = num
    
    print(f"{num}. Current_score: {score:0.3f}, best_score: {best_score:0.3f}")

0. Current_score: 0.000, best_score: 100.000
1. Current_score: 59.449, best_score: 59.449
2. Current_score: 54.133, best_score: 54.133
3. Current_score: 85.621, best_score: 54.133
4. Current_score: 47.654, best_score: 47.654
5. Current_score: 50.457, best_score: 47.654
6. Current_score: 48.475, best_score: 47.654
7. Current_score: 44.099, best_score: 44.099
8. Current_score: 49.825, best_score: 44.099
9. Current_score: 44.703, best_score: 44.099
10. Current_score: 64.806, best_score: 44.099
11. Current_score: 49.364, best_score: 44.099
12. Current_score: 43.589, best_score: 43.589
13. Current_score: 46.323, best_score: 43.589
14. Current_score: 52.249, best_score: 43.589
15. Current_score: 61.450, best_score: 43.589
16. Current_score: 70.325, best_score: 43.589
17. Current_score: 100.491, best_score: 43.589
18. Current_score: 70.837, best_score: 43.589
19. Current_score: 42.038, best_score: 42.038
20. Current_score: 46.701, best_score: 42.038
21. Current_score: 39.194, best_score: 39.1

In [16]:
episode_1 = next(iter(episode_ds.take(1)))
episode_2  = next(iter(episode_ds.take(7515)))

In [19]:
frames_1 = []
for step in episode_1['steps'].as_numpy_iterator():
    frames_1.append(step['observation']['rgb'])
    
frames_2 = []
for step in episode_2['steps'].as_numpy_iterator():
    frames_2.append(step['observation']['rgb'])

In [20]:
frame = frames_1 + frames_2

In [21]:
mediapy.show_video(frame, title=decode_inst(step['observation']['instruction']), fps=5)

0
slide the yellow star next to the green star  This browser does not support the video tag.
