# ASIMOV datasets
https://asimov-benchmark.github.io/

## Display Code

In [None]:
!pip3 install mediapy
!pip install tfds-nightly   # to get most up-to-date registered datasets
!pip install apache_beam

In [None]:
from IPython.display import display
from IPython.display import Markdown
import tensorflow as tf

print_fn = lambda x: display(Markdown(x))

try:
  import mediapy as mpy
except ModuleNotFoundError:
  print('Not displaying images.')
  mpy = None


class Sample():
  def __init__(self, example, display_one_instruction=True):
    self.example = example
    self.display_one_instruction = display_one_instruction

  def display(self):
    newline = '<br>'
    for k, v in self.example.items():
      if 'image' in k:
        print_fn(f'**{k}**:')
        print_fn(f'image of size {v.numpy().shape}')
        if mpy:
          mpy.show_image(v.numpy())
      elif k == 'instructions':
        for i in range(len(v['instruction'])):
          sample_dict = {}
          for ik, iv in v.items():
            sample_dict[ik] = iv[i]
          sample = Sample(sample_dict)
          print_fn('---')
          print_fn(f'## Sample Entry {i+1}{newline}')
          sample.display()
          if self.display_one_instruction:
            break
      else:
        if isinstance(v, tf.Tensor) and v.dtype == tf.string:
          v = v.numpy()
          if isinstance(v, bytes):
            v = v.decode('utf-8')
        print_fn(f'**{k}**: {v}{newline}{newline}')

def get_single_example(dataset_name: str):
  builder = tfds.builder_from_directory(
      f'gs://gresearch/robotics/{dataset_name}/0.1.0/'
  )
  any_split = list(builder.info.splits.keys())[0]
  ds = builder.as_dataset(split=any_split)
  it = iter(ds)
  example = next(it)
  return example

## Loading the datasets from the GCS bucket

A sanity check featuring how to load each dataset from GCS bucket.

In [None]:
import tensorflow as tf
import tensorflow_datasets as tfds

DATASETS = [
    'asimov_injury_val',  # Situations generated from real hospital injury reports (validation set).
    'asimov_dilemmas_auto_val',  # Binary dilemma questions generated from counterfactual situations used to auto-amend generated constitutions (validation set).
    'asimov_dilemmas_scifi_train',  # Multiple-choice ethical questions (with desirable and undesirable answers) based on situations inspired from Science Fiction literature (training set).
    'asimov_dilemmas_scifi_val',  # Multiple-choice ethical questions (with desirable and undesirable answers) based on situations inspired from Science Fiction literature (validation set).
    'asimov_multimodal_auto_val',  # (Image, context, instruction) triplets generated from real images (from RoboVQA dataset) which are modified to contain undesirable elements, generated instructions can be desirable or undesirable (validation set).
    'asimov_multimodal_manual_val',  # (Image, context, instruction) triplets manually taken and written by humans while ensuring that the instruction desirability can only be determined by looking at the image (validation set).
]

i = 0
for ds_name in DATASETS:
  builder = tfds.builder_from_directory(
      f'gs://gresearch/robotics/{ds_name}/0.1.0/'
  )
  for split in builder.info.splits.keys():
    ds = builder.as_dataset(split=split)
    it = iter(ds)
    example = next(it)
    assert example is not None

## Loading the datasets from the TFDS Catalog

A sanity check featuring how to load each dataset registered in TFDS Catalog. This will download and cache the datasets to the local disk for fast access.

In [None]:
import tensorflow_datasets as tfds
DOWNLOAD_DIR = '/tmp/tensorflow_datasets'

for ds_name in DATASETS:
  print(f'Loading the dataset {ds_name}')
  ds = tfds.load(ds_name, data_dir=DOWNLOAD_DIR)
  for split in builder.info.splits.keys():
    ds = builder.as_dataset(split=split)
    it = iter(ds)
    example = next(it)
    assert example is not None

## Display `asimov_multimodal_auto_val` dataset

In [None]:
example = get_single_example('asimov_multimodal_auto_val')
sample = Sample(example)

sample.display()

## Display `asimov_dilemmas_auto_val` dataset

In [None]:
example = get_single_example('asimov_dilemmas_auto_val')
sample = Sample(example)

sample.display()

## Display `asimov_injury_val` dataset

In [None]:
example = get_single_example('asimov_injury_val')
sample = Sample(example)

sample.display()