Copyright 2022 Google LLC.

Licensed under the Apache License, Version 2.0 (the "License");

In [None]:
#@title Example Dataset Preparation
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

## Imports

In [None]:
import matplotlib.pyplot as plt
import tensorflow as tf
import tensorflow_datasets as tfds

In [None]:
import cv2
import pandas
import PIL
import pycocotools
import scipy

In [None]:
# Ensure we are working from google_research/ directory (not google_research/factors_of_influence):
# cd ../

In [None]:
from factors_of_influence.fids import fids_tfds_builders as fids

# Prepare a single dataset

In [None]:
# For debugging consider using data_dir='/tmp/fids/tfds' which will save the tfds
# dataset in the specified directory instead of the default tfds data directory.
ds_build = tfds.builder('fids_kitti_segmentation')

In [None]:
ds_build.download_and_prepare()

In [None]:
ds_info = ds_build.info
ds = ds_build.as_dataset(split='train')

In [None]:
print(ds_info.metadata['name'],ds_info.metadata['config'])
for f in ds_info.metadata['features']:
    print(f, len(ds_info.metadata['features'][f]))

In [None]:
for ex in ds.take(1).as_numpy_iterator():
    print(ex.keys())

In [None]:
f, axs = plt.subplots(1, 2, figsize=(20, 10))
axs[0].imshow(ex['image'])
axs[1].imshow(ex['segmentation'])

## Subsequent calls
Subsequent calls to the dataset can use the `tfds.load` interface as follows:

In [None]:
ds, ds_info = tfds.load('fids_kitti_segmentation', split='train', with_info=True)

# Prepare all datasets and all configs
Some datasets have multiple configs, here we show how these could be generated.

In [None]:
def _yield_all_datasets_and_configs(fids_datasets: list[str] = None):
  """Yield all datasets and configs, given a list of datasets."""
  if fids_datasets is None:  # Use all fids_ datasets:
    fids_datasets = [ds for ds in tfds.list_builders() if ds.startswith('fids_')]
  
  for dataset_name in fids_datasets:
    ds_build = tfds.builder(dataset_name)
    all_configs = [c.name for c in ds_build.BUILDER_CONFIGS]
    for config_name in all_configs:            
      yield dataset_name, config_name

In [None]:
for ds_name, config_name in _yield_all_datasets_and_configs():
  print(f'{ds_name:25s} | {config_name:20s}')

In [None]:
# Warning: This starts to convert all datasets to TFDS sequentially!
for ds_name, config_name in _yield_all_datasets_and_configs():
  print(f'{ds_name:25s} | {config_name:20s}')
  ds_build = tfds.builder(f'{dataset_name}/{config_name}')
  ds_build.download_and_prepare()