From f4650ca47c5a8730fa6228f213552db39cb37e87 Mon Sep 17 00:00:00 2001
From: Carlos Ezequiel <cezequiel@google.com>
Date: Wed, 4 Nov 2020 11:05:00 -0500
Subject: [PATCH] Release/2.0 (#56)

* Update check_tfrecords to use new dataset load function.

* Add tfrecord_dir to create_tfrecords output.

* Restructure test image directory to match expected format.

* Feature/dataclass (#44)

* Added data classes for types.

* Checking in progress.

* Checking in more changes.

* Converted types to classes and refactored schema into OO pattern.

* Changed OrderedDict import to support py3.6.

* Changed OrderedDict import to support py3.6.

* Updated setup.py for version.

* fixing setup.py

* Patched requirements and setup.

* Addressed comments in code review.

* Addressed code comments round 2.

* refactored IMAGE_CSV_SCHEMA.

* Merged check_test.py from dev

Co-authored-by: Carlos Ezequiel <cezequiel@google.com>

* Feature/structured data tutorial (#45)

* Converted types to classes and refactored schema into OO pattern.

* Add tutorial on structured data conversion.

This changes types.FloatInput to use tf.float32 for its feature_spec
attribute to address potential incompatibility with using tf.float64
type in TensorFlow Transform.

Co-authored-by: Mike Bernico <mikebernico@google.com>

* Update structured data tutorial to use  output dir.

* Clarify need for proper header when using create_tfrecords. Fixes #47.

* Clean up README and update image directory notebook.

* Feature/test image dir (#49)

* Restructure test image directory to match expected format.

* Clean up README and update image directory notebook.

* Fix minor issues

* Add an explicit error message for missing train split

* Configure automated tests for Jupyter notebooks.

* Add convert_and_load function.

Also refactor create_tfrecords to convert.

* Refactor check and common modules to utils.

* Add test targets for py files and notebooks.

* Feature/convert and load (#55)

* Add convert_and_load function.

Also refactor create_tfrecords to convert.

* Refactor check and common modules to utils.

* Add test targets for py files and notebooks.

* Update version in setup.py and release notes.

* Fix issues with GCS path parsing.

Co-authored-by: Mike Bernico <mikebernico@google.com>
Co-authored-by: Sergii Khomenko <khomenko@brainscode.com>
---
 .github/workflows/python-cicd.yml             |   11 +-
 .gitignore                                    |    3 +
 Makefile                                      |   13 +-
 README.md                                     |  189 +--
 RELEASE.md                                    |    8 +
 requirements.txt                              |    3 +
 samples/Basic-TFRecorder-Usage.ipynb          | 1026 ++++++++++++++++-
 samples/Convert-image-directory.ipynb         |  200 ++++
 samples/Convert-structured-data.ipynb         |  400 +++++++
 samples/Loading-a-TF-Dataset.ipynb            |   27 +-
 ...FRecorder-with-Google-Cloud-Dataflow.ipynb |    4 +-
 setup.py                                      |    8 +-
 tfrecorder/__init__.py                        |    8 +-
 tfrecorder/accessor.py                        |   12 +-
 tfrecorder/beam_image_test.py                 |   15 +-
 tfrecorder/beam_pipeline.py                   |   84 +-
 tfrecorder/beam_pipeline_test.py              |   44 +-
 tfrecorder/cli.py                             |    8 +-
 tfrecorder/common.py                          |   42 -
 tfrecorder/common_test.py                     |   53 -
 tfrecorder/{client.py => converter.py}        |  125 +-
 .../{client_test.py => converter_test.py}     |  159 ++-
 tfrecorder/{dataset.py => dataset_loader.py}  |   11 +-
 ...dataset_test.py => dataset_loader_test.py} |   34 +-
 tfrecorder/input_schema.py                    |  102 ++
 .../{schema_test.py => input_schema_test.py}  |   37 +-
 tfrecorder/schema.py                          |  187 ---
 tfrecorder/test_data/data.csv                 |   12 +-
 .../images/{ => TEST}/cat/cat-800x600-3.jpg   |  Bin
 .../images/{ => TEST}/goat/goat-640x427-3.jpg |  Bin
 .../images/{ => TRAIN}/cat/cat-640x853-1.jpg  |  Bin
 .../{ => TRAIN}/goat/goat-640x640-1.jpg       |  Bin
 .../{ => VALIDATION}/cat/cat-800x600-2.jpg    |  Bin
 .../{ => VALIDATION}/goat/goat-320x320-2.jpg  |  Bin
 tfrecorder/test_utils.py                      |   13 +-
 tfrecorder/types.py                           |   68 +-
 tfrecorder/{check.py => utils.py}             |   77 +-
 tfrecorder/{check_test.py => utils_test.py}   |   92 +-
 38 files changed, 2333 insertions(+), 742 deletions(-)
 create mode 100644 samples/Convert-image-directory.ipynb
 create mode 100644 samples/Convert-structured-data.ipynb
 delete mode 100644 tfrecorder/common.py
 delete mode 100644 tfrecorder/common_test.py
 rename tfrecorder/{client.py => converter.py} (76%)
 rename tfrecorder/{client_test.py => converter_test.py} (66%)
 rename tfrecorder/{dataset.py => dataset_loader.py} (92%)
 rename tfrecorder/{dataset_test.py => dataset_loader_test.py} (64%)
 create mode 100644 tfrecorder/input_schema.py
 rename tfrecorder/{schema_test.py => input_schema_test.py} (58%)
 delete mode 100644 tfrecorder/schema.py
 rename tfrecorder/test_data/images/{ => TEST}/cat/cat-800x600-3.jpg (100%)
 rename tfrecorder/test_data/images/{ => TEST}/goat/goat-640x427-3.jpg (100%)
 rename tfrecorder/test_data/images/{ => TRAIN}/cat/cat-640x853-1.jpg (100%)
 rename tfrecorder/test_data/images/{ => TRAIN}/goat/goat-640x640-1.jpg (100%)
 rename tfrecorder/test_data/images/{ => VALIDATION}/cat/cat-800x600-2.jpg (100%)
 rename tfrecorder/test_data/images/{ => VALIDATION}/goat/goat-320x320-2.jpg (100%)
 rename tfrecorder/{check.py => utils.py} (60%)
 rename tfrecorder/{check_test.py => utils_test.py} (60%)

diff --git a/.github/workflows/python-cicd.yml b/.github/workflows/python-cicd.yml
index 2a25bd4..9d80335 100644
--- a/.github/workflows/python-cicd.yml
+++ b/.github/workflows/python-cicd.yml
@@ -7,7 +7,6 @@ on: [push]
 
 jobs:
   build:
-
     runs-on: ubuntu-latest
     strategy:
       matrix:
@@ -23,10 +22,14 @@ jobs:
       run: |
         python -m pip install --upgrade pip
         if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
+
+    - name: Run all tests
+      run: |
+        export PYTHONPATH="$GITHUB_WORKSPACE"
+        make test
+
     - name: Lint with pylint
       run: |
         make pylint
 
-    - name: Run tests
-      run: |
-        make test
+
diff --git a/.gitignore b/.gitignore
index 1cda03e..2873027 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,6 @@
+build/
+dist/
+tfrecorder.egg-info
 .idea/
 .ipynb_checkpoints/
 .vscode/
diff --git a/Makefile b/Makefile
index fae2a05..f69cb7e 100644
--- a/Makefile
+++ b/Makefile
@@ -1,12 +1,17 @@
-all: init test pylint
+all: init testnb test pylint
 
 init:
 	pip install -r requirements.txt
 
-test:
+test: test-nb test-py
+
+test-py:
 	nosetests --with-coverage -v --cover-package=tfrecorder
 
+test-nb:
+	ls -1 samples/*.ipynb | grep -v '^.*Dataflow.ipynb' | xargs py.test --nbval-lax -p no:python
+
 pylint:
-	pylint tfrecorder
+	pylint -j 0 tfrecorder
 
-.PHONY: all init test pylint 
+.PHONY: all init test pylint
diff --git a/README.md b/README.md
index 72f8048..1ad9f2a 100644
--- a/README.md
+++ b/README.md
@@ -9,7 +9,7 @@ TFRecorder can convert any Pandas DataFrame or CSV file into TFRecords. If your
 [Release Notes](RELEASE.md)
 
 ## Why TFRecorder?
-Using the TFRecord storage format is important for optimal machine learning pipelines and getting the most from your hardware (in cloud or on prem). The TFRecorder project started inside [Google Cloud AI Services](https://cloud.google.com/consulting) when we realized we were writing TFRecord conversion code over and over again.  
+Using the TFRecord storage format is important for optimal machine learning pipelines and getting the most from your hardware (in cloud or on prem). The TFRecorder project started inside [Google Cloud AI Services](https://cloud.google.com/consulting) when we realized we were writing TFRecord conversion code over and over again.
 
 When to use TFRecords:
 * Your model is input bound (reading data is impacting training time).
@@ -71,7 +71,7 @@ df.tensorflow.to_tfr(output_dir='/my/output/path')
 
 Google Cloud Platform Dataflow workers need to be supplied with the tfrecorder
 package that you would like to run remotely.  To do so first download or build
-the package (a python wheel file) and then specify the path the the file when
+the package (a python wheel file) and then specify the path the file when
 tfrecorder is called.
 
 Step 1: Download or create the wheel file.
@@ -109,7 +109,7 @@ Using Python interpreter:
 ```python
 import tfrecorder
 
-tfrecorder.create_tfrecords(
+tfrecorder.convert(
     source='/path/to/data.csv',
     output_dir='gs://my/bucket')
 ```
@@ -126,10 +126,9 @@ tfrecorder create-tfrecords \
 ```python
 import tfrecorder
 
-tfrecorder.create_tfrecords(
+tfrecorder.convert(
     source='/path/to/image_dir',
-    output_dir='gs://my/bucket',
-)
+    output_dir='gs://my/bucket')
 ```
 
 The image directory should have the following general structure:
@@ -159,7 +158,7 @@ images/
 
 ### Loading a TF Dataset from TFRecord files
 
-You can load a TensorFlow dataset from TFRecord files generated by TFRecorder 
+You can load a TensorFlow dataset from TFRecord files generated by TFRecorder
 on your local machine.
 
 ```python
@@ -175,8 +174,9 @@ Using Python interpreter:
 ```python
 import tfrecorder
 
-tfrecorder.check_tfrecords(
-    file_pattern='/path/to/tfrecords/train*.tfrecord.gz',
+tfrecorder.inspect(
+    tfrecord_dir='/path/to/tfrecords/',
+    split='TRAIN',
     num_records=5,
     output_dir='/tmp/output')
 ```
@@ -187,16 +187,17 @@ representing the images encoded into TFRecords.
 Using the command line:
 
 ```bash
-tfrecorder check-tfrecords \
-    --file_pattern=/path/to/tfrecords/train*.tfrecord.gz \
+tfrecorder inspect \
+    --tfrecord-dir=/path/to/tfrecords/ \
+    --split='TRAIN' \
     --num_records=5 \
     --output_dir=/tmp/output
 ```
 
 ## Default Schema
 
-If you don't specify an input schema, TFRecorder expects data to be in the same format as 
-[AutoML Vision input](https://cloud.google.com/vision/automl/docs/prepare).  
+If you don't specify an input schema, TFRecorder expects data to be in the same format as
+[AutoML Vision input](https://cloud.google.com/vision/automl/docs/prepare).
 This format looks like a Pandas DataFrame or CSV formatted as:
 
 | split | image_uri                 | label |
@@ -205,139 +206,139 @@ This format looks like a Pandas DataFrame or CSV formatted as:
 
 where:
 * `split` can take on the values TRAIN, VALIDATION, and TEST
-* `image_uri` specifies a local or Google Cloud Storage location for the image file. 
-* `label` can be either a text based label that will be integerized or integer
+* `image_uri` specifies a local or Google Cloud Storage location for the image file.
+* `label` can be either a text-based label that will be integerized or integer
 
 ## Flexible Schema
 
-TFRecorder's flexible schema system allows you to use any schema you want for your input data. To support any input data schema, provide a schema map to TFRecorder. A TFRecorder schema_map creates a mapping between your dataframe column names and their types in the resulting
-TFRecord.
+TFRecorder's flexible schema system allows you to use any schema you want for your input data.
 
-### Creating and using a schema map
-A schema map is a Python dictionary that maps DataFrame column names to [supported
-TFRecorder types.](#Supported-types)
+For example, the default image CSV schema input can be defined like this:
+```python
+import pandas as pd
+import tfrecorder
+from tfrecorder import input_schema
+from tfrecorder import types
 
-For example, the default image CSV input can be defined like this:
+image_csv_schema = input_schema.Schema({
+    'split': types.SplitKey,
+    'image_uri': types.ImageUri,
+    'label': types.StringLabel
+})
 
-```python
-from tfrecorder import schema
+# You can then pass the schema to `tfrecorder.create_tfrecords`.
 
-image_csv_schema = {
-    'split': schema.split_key,
-    'image_uri': schema.image_uri,
-    'label': schema.string_label
-}
+df = pd.read_csv(...)
+df.tensorflow.to_tfr(
+    output_dir='gs://my/bucket',
+    schema_map=image_csv_schema,
+    runner='DataflowRunner',
+    project='my-project',
+    region='us-central1')
 ```
-Once created a schema_map can be sent to TFRecorder.
+
+### Flexible Schema Example
+
+Imagine that you have a dataset that you would like to convert to TFRecords that
+looks like this:
+
+| split | x     |   y  | label |
+|-------|-------|------|-------|
+| TRAIN | 0.32  | 42   |1      |
+
+You can use TFRecorder as shown below:
 
 ```python
 import pandas as pd
-from tfrecorder import schema
 import tfrecorder
+from tfrecorder import input_schema
+from tfrecorder import types
+
+# First create a schema map
+schema = input_schema.Schema({
+    'split': types.SplitKey,
+    'x': types.FloatInput,
+    'y': types.IntegerInput,
+    'label': types.IntegerLabel,
+})
+
+# Now call TFRecorder with the specified schema_map
 
 df = pd.read_csv(...)
 df.tensorflow.to_tfr(
     output_dir='gs://my/bucket',
-    schema_map=schema.image_csv_schema,
+    schema=schema,
     runner='DataflowRunner',
     project='my-project',
     region='us-central1')
 ```
+After calling TFRecorder's `to_tfr()` function, TFRecorder will create an Apache beam pipeline, either locally or in this case
+using Google Cloud's Dataflow runner. This beam pipeline will use the schema map to identify the types you've associated with
+each data column and process your data using [TensorFlow Transform](https://www.tensorflow.org/tfx/transform/get_started) and TFRecorder's image processing functions to convert the data into into TFRecords.
 
 ### Supported types
-TFRecorder's schema system supports several types, all listed below. You can use
-these types by referencing them in the schema map. Each type informs TFRecorder how 
-to treat your DataFrame columns.  For example, the schema mapping 
-`my_split_key: schema.SplitKeyType` tells TFRecorder to treat the column `my_split_key` as
-type `schema.SplitKeyType` and create dataset splits based on it's contents. 
 
-#### schema.ImageUriType
-* Specifies the path to an image. When specified, TFRecorder
-will load the specified image and store the image as a [base64 encoded](https://docs.python.org/3/library/base64.html)
- [tf.string](https://www.tensorflow.org/tutorials/load_data/unicode) in the key 'image' 
-along with the height, width, and image channels  as integers using they keys 'image_height', 'image_width', and 'image_channels'.
-* A schema can contain only one imageUriType
+TFRecorder's schema system supports several types.
+You can use these types by referencing them in the schema map.
+Each type informs TFRecorder how to treat your DataFrame columns.
+
+#### types.SplitKey
 
-#### schema.SplitKeyType
 * A split key is required for TFRecorder at this time.
 * Only one split key is allowed.
-* Specifies a split key that TFRecorder will use to partition the 
+* Specifies a split key that TFRecorder will use to partition the
 input dataset on.
 * Allowed values are 'TRAIN', 'VALIDATION, and 'TEST'
 
-Note: If you do not want your data to be partitioned please include a split_key and
-set all rows to TRAIN.
+Note: If you do not want your data to be partitioned, include a column with
+`types.SplitKey` and set all the elements to `TRAIN`.
+
+#### types.ImageUri
+
+* Specifies the path to an image. When specified, TFRecorder
+will load the specified image and store the image as a [base64 encoded](https://docs.python.org/3/library/base64.html)
+ [tf.string](https://www.tensorflow.org/tutorials/load_data/unicode) in the key 'image'
+along with the height, width, and image channels  as integers using the keys 'image_height', 'image_width', and 'image_channels'.
+* A schema can contain only one imageUri column
+
+#### types.IntegerInput
 
-#### schema.IntegerInputType
 * Specifies an int input.
 * Will be scaled to mean 0, variance 1.
 
-#### schema.FloatInputType
+#### types.FloatInput
+
 * Specifies an float input.
 * Will be scaled to mean 0, variance 1.
 
-#### schema.CategoricalInputType
+#### types.CategoricalInput
+
 * Specifies a string input.
 * Vocabulary computed and output integerized.
 
-#### schema.IntegerLabelType
+#### types.IntegerLabel
+
 * Specifies an integer target.
 * Not transformed.
 
-#### schema.StringLabelType
+#### types.StringLabel
+
 * Specifies a string target.
 * Vocabulary computed and *output integerized.*
 
-### Flexible Schema Example
-
-Imagine that you have a dataset that you would like to convert to TFRecords that 
-looks like this:
-
-| split | x     |   y  | label |
-|-------|-------|------|-------|
-| TRAIN | 0.32  | 42   |1      |
-
-You can use TFRecorder as shown below:
-
-```python
-import pandas as pd
-import tfrecorder
-from tfrecorder import schema
-
-# First create a schema map
-schema_map = {
-    'split':schema.SplitKeyType,
-    'x':schema.FloatInputType,
-    'y':schema.IntegerInputType,
-    'label':schema.IntegerLabelType
-}
-
-# Now call TFRecorder with the specified schema_map
-
-df = pd.read_csv(...)
-df.tensorflow.to_tfr(
-    output_dir='gs://my/bucket',
-    schema_map=schema_map,
-    runner='DataflowRunner',
-    project='my-project',
-    region='us-central1')
-```
-After calling TFRecorder's to_tfr() function, TFRecorder will create an Apache beam pipeline, either locally or in this case
-using Google Cloud's Dataflow runner. This beam pipeline will use the schema map to identify the types you've associated with
-each data column and process your data using [TensorFlow Transform](https://www.tensorflow.org/tfx/transform/get_started) and TFRecorder's image processing functions to convert the data into into TFRecords.
-
 ## Contributing
 
-Pull requests are welcome. Please see our [code of conduct](docs/code-of-conduct.md) and [contributing guide](docs/contributing.md).
+Pull requests are welcome.
+Please see our [code of conduct](docs/code-of-conduct.md) and [contributing guide](docs/contributing.md).
 
 ## Why TFRecorder?
-Using the TFRecord storage format is important for optimal machine learning pipelines and getting the most from your hardware (in cloud or on prem). 
+
+Using the TFRecord storage format is important for optimal machine learning pipelines and getting the most from your hardware (in cloud or on prem).
 
 TFRecords help when:
 * Your model is input bound (reading data is impacting training time).
 * Anytime you want to use tf.Dataset
 * When your dataset can't fit into memory
 
-
-In our work at [Google Cloud AI Services](https://cloud.google.com/consulting) we wanted to help our users spend their time writing AI/ML applications, and spend less time converting data. 
-
+Need help with using AI in the cloud?
+Visit [Google Cloud AI Services](https://cloud.google.com/consulting).
diff --git a/RELEASE.md b/RELEASE.md
index fedc739..0319e6f 100644
--- a/RELEASE.md
+++ b/RELEASE.md
@@ -1,3 +1,11 @@
+# Release 2.0
+
+* Changes `create_tfrecords` and `check_tfrecords` to `convert` and `inspect` respectively
+* Adds `convert_and_load` function
+* Changes flexible schema to use `dataclasses`
+* Adds automated testing for notebooks
+* Minor fixes and usability improvements
+
 # Hotfix 1.1.3
 
 * Adds note regarding DataFrame header specification in README.md.
diff --git a/requirements.txt b/requirements.txt
index c669ff5..37ec989 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -12,3 +12,6 @@ jupyter >= 1.0.0
 tensorflow >= 2.3.1
 pyarrow <0.18,>=0.17
 frozendict >= 1.2
+dataclasses >= 0.5;python_version<"3.7"
+nbval >= 0.9.6
+pytest >= 6.1.1
diff --git a/samples/Basic-TFRecorder-Usage.ipynb b/samples/Basic-TFRecorder-Usage.ipynb
index 7afbf7b..6d0ebd1 100644
--- a/samples/Basic-TFRecorder-Usage.ipynb
+++ b/samples/Basic-TFRecorder-Usage.ipynb
@@ -7,75 +7,1049 @@
     "# Basic TFRUtil Usage\n",
     "\n",
     "This notebook demonstrates the basic usage of TFRUtil.  It is meant to be run from the <repo>/sample/ path and uses test images included with TFRUtil stored in <repo>/tfrutil/test_data.\n",
-    "    \n",
+    "\n",
     "Before running this notebook, please install TFUtil with the command `python setup.py` from the repository root."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 1,
    "metadata": {},
    "outputs": [],
    "source": [
-    "import pandas as pd \n",
+    "import os\n",
+    "import pathlib\n",
+    "\n",
+    "import pandas as pd\n",
     "import tfrecorder"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "OUTPUT_PATH=\"./out\" # YOUR LOCAL OUTPUT PATH HERE"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 2,
    "metadata": {},
    "outputs": [],
    "source": [
-    "df = pd.read_csv(\"data.csv\")"
+    "input_file = pathlib.Path(os.getcwd())/'../tfrecorder/test_data/data.csv'\n",
+    "output_dir = './out'  # YOUR LOCAL OUTPUT PATH HERE"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 3,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>split</th>\n",
+       "      <th>image_uri</th>\n",
+       "      <th>label</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>TEST</td>\n",
+       "      <td>tfrecorder/test_data/images/TEST/cat/cat-800x6...</td>\n",
+       "      <td>cat</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>TEST</td>\n",
+       "      <td>tfrecorder/test_data/images/TEST/goat/goat-640...</td>\n",
+       "      <td>goat</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>TRAIN</td>\n",
+       "      <td>tfrecorder/test_data/images/TRAIN/cat/cat-640x...</td>\n",
+       "      <td>cat</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>TRAIN</td>\n",
+       "      <td>tfrecorder/test_data/images/TRAIN/goat/goat-64...</td>\n",
+       "      <td>goat</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>VALIDATION</td>\n",
+       "      <td>tfrecorder/test_data/images/VALIDATION/cat/cat...</td>\n",
+       "      <td>cat</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "        split                                          image_uri label\n",
+       "0        TEST  tfrecorder/test_data/images/TEST/cat/cat-800x6...   cat\n",
+       "1        TEST  tfrecorder/test_data/images/TEST/goat/goat-640...  goat\n",
+       "2       TRAIN  tfrecorder/test_data/images/TRAIN/cat/cat-640x...   cat\n",
+       "3       TRAIN  tfrecorder/test_data/images/TRAIN/goat/goat-64...  goat\n",
+       "4  VALIDATION  tfrecorder/test_data/images/VALIDATION/cat/cat...   cat"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
-    "df"
+    "df = pd.read_csv(input_file)\n",
+    "df.head()"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 4,
    "metadata": {
     "scrolled": true
    },
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<b>Logging output to /tmp/tfrecorder-beam.log </b>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/javascript": [
+       "\n",
+       "        if (typeof window.interactive_beam_jquery == 'undefined') {\n",
+       "          var jqueryScript = document.createElement('script');\n",
+       "          jqueryScript.src = 'https://code.jquery.com/jquery-3.4.1.slim.min.js';\n",
+       "          jqueryScript.type = 'text/javascript';\n",
+       "          jqueryScript.onload = function() {\n",
+       "            var datatableScript = document.createElement('script');\n",
+       "            datatableScript.src = 'https://cdn.datatables.net/1.10.20/js/jquery.dataTables.min.js';\n",
+       "            datatableScript.type = 'text/javascript';\n",
+       "            datatableScript.onload = function() {\n",
+       "              window.interactive_beam_jquery = jQuery.noConflict(true);\n",
+       "              window.interactive_beam_jquery(document).ready(function($){\n",
+       "                \n",
+       "              });\n",
+       "            }\n",
+       "            document.head.appendChild(datatableScript);\n",
+       "          };\n",
+       "          document.head.appendChild(jqueryScript);\n",
+       "        } else {\n",
+       "          window.interactive_beam_jquery(document).ready(function($){\n",
+       "            \n",
+       "          });\n",
+       "        }"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/javascript": [
+       "\n",
+       "        var import_html = () => {\n",
+       "          ['https://raw.githubusercontent.com/PAIR-code/facets/1.0.0/facets-dist/facets-jupyter.html'].forEach(href => {\n",
+       "            var link = document.createElement('link');\n",
+       "            link.rel = 'import'\n",
+       "            link.href = href;\n",
+       "            document.head.appendChild(link);\n",
+       "          });\n",
+       "        }\n",
+       "        if ('import' in document.createElement('link')) {\n",
+       "          import_html();\n",
+       "        } else {\n",
+       "          var webcomponentScript = document.createElement('script');\n",
+       "          webcomponentScript.src = 'https://cdnjs.cloudflare.com/ajax/libs/webcomponentsjs/1.3.3/webcomponents-lite.js';\n",
+       "          webcomponentScript.type = 'text/javascript';\n",
+       "          webcomponentScript.onload = function(){\n",
+       "            import_html();\n",
+       "          };\n",
+       "          document.head.appendChild(webcomponentScript);\n",
+       "        }"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/plain": [
+       "{'job_id': 'DirectRunner',\n",
+       " 'metrics': {'rows': 6, 'good_images': None, 'bad_images': 6},\n",
+       " 'tfrecord_dir': './out/tfrecorder-20201028-160301-to-tfr'}"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
-    "df.tensorflow.to_tfr(output_dir=OUTPUT_PATH)"
+    "results = df.tensorflow.to_tfr(output_dir)\n",
+    "results"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 5,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\u001B[1m\u001B[34m\u001B[47mtfrecorder-20201022-121014-to-tfr\u001B[m\u001B[m \u001B[1m\u001B[34m\u001B[47mtfrecorder-20201022-123151-to-tfr\u001B[m\u001B[m\n",
+      "\u001B[1m\u001B[34m\u001B[47mtfrecorder-20201022-121047-to-tfr\u001B[m\u001B[m \u001B[1m\u001B[34m\u001B[47mtfrecorder-20201022-132122-to-tfr\u001B[m\u001B[m\n",
+      "\u001B[1m\u001B[34m\u001B[47mtfrecorder-20201022-121052-to-tfr\u001B[m\u001B[m \u001B[1m\u001B[34m\u001B[47mtfrecorder-20201022-132135-to-tfr\u001B[m\u001B[m\n",
+      "\u001B[1m\u001B[34m\u001B[47mtfrecorder-20201022-122052-to-tfr\u001B[m\u001B[m \u001B[1m\u001B[34m\u001B[47mtfrecorder-20201022-132406-to-tfr\u001B[m\u001B[m\n",
+      "\u001B[1m\u001B[34m\u001B[47mtfrecorder-20201022-122403-to-tfr\u001B[m\u001B[m \u001B[1m\u001B[34m\u001B[47mtfrecorder-20201022-132701-to-tfr\u001B[m\u001B[m\n",
+      "\u001B[1m\u001B[34m\u001B[47mtfrecorder-20201022-122505-to-tfr\u001B[m\u001B[m \u001B[1m\u001B[34m\u001B[47mtfrecorder-20201022-133529-to-tfr\u001B[m\u001B[m\n",
+      "\u001B[1m\u001B[34m\u001B[47mtfrecorder-20201022-122646-to-tfr\u001B[m\u001B[m \u001B[1m\u001B[34m\u001B[47mtfrecorder-20201022-133624-to-tfr\u001B[m\u001B[m\n",
+      "\u001B[1m\u001B[34m\u001B[47mtfrecorder-20201022-122743-to-tfr\u001B[m\u001B[m \u001B[1m\u001B[34m\u001B[47mtfrecorder-20201027-173444-to-tfr\u001B[m\u001B[m\n",
+      "\u001B[1m\u001B[34m\u001B[47mtfrecorder-20201022-123126-to-tfr\u001B[m\u001B[m \u001B[1m\u001B[34m\u001B[47mtfrecorder-20201028-160301-to-tfr\u001B[m\u001B[m\n",
+      "\n",
+      "./out/tfrecorder-20201022-121014-to-tfr:\n",
+      "discarded-data-00000-of-00001         \u001B[1m\u001B[34m\u001B[47mtransform_fn\u001B[m\u001B[m\n",
+      "test-00000-of-00001.tfrecord.gz       \u001B[1m\u001B[34m\u001B[47mtransformed_metadata\u001B[m\u001B[m\n",
+      "\u001B[1m\u001B[34m\u001B[47mtft_tmp\u001B[m\u001B[m                               validation-00000-of-00001.tfrecord.gz\n",
+      "train-00000-of-00001.tfrecord.gz\n",
+      "\n",
+      "./out/tfrecorder-20201022-121014-to-tfr/tft_tmp:\n",
+      "\u001B[1m\u001B[34m\u001B[47mtftransform_tmp\u001B[m\u001B[m\n",
+      "\n",
+      "./out/tfrecorder-20201022-121014-to-tfr/tft_tmp/tftransform_tmp:\n",
+      "\u001B[1m\u001B[34m\u001B[47m4068da78afd34722a84c51ceac547efa\u001B[m\u001B[m\n",
+      "\u001B[1m\u001B[34m\u001B[47m64e013b30bc74404802fe2460761f588\u001B[m\u001B[m\n",
+      "\u001B[1m\u001B[34m\u001B[47mcdcca595e2b641e3849589a01521bf9e\u001B[m\u001B[m\n",
+      "vocab_compute_and_apply_vocabulary_vocabulary\n",
+      "\n",
+      "./out/tfrecorder-20201022-121014-to-tfr/tft_tmp/tftransform_tmp/4068da78afd34722a84c51ceac547efa:\n",
+      "saved_model.pb \u001B[1m\u001B[34m\u001B[47mvariables\u001B[m\u001B[m\n",
+      "\n",
+      "./out/tfrecorder-20201022-121014-to-tfr/tft_tmp/tftransform_tmp/4068da78afd34722a84c51ceac547efa/variables:\n",
+      "\n",
+      "./out/tfrecorder-20201022-121014-to-tfr/tft_tmp/tftransform_tmp/64e013b30bc74404802fe2460761f588:\n",
+      "\u001B[1m\u001B[34m\u001B[47massets\u001B[m\u001B[m         saved_model.pb \u001B[1m\u001B[34m\u001B[47mvariables\u001B[m\u001B[m\n",
+      "\n",
+      "./out/tfrecorder-20201022-121014-to-tfr/tft_tmp/tftransform_tmp/64e013b30bc74404802fe2460761f588/assets:\n",
+      "vocab_compute_and_apply_vocabulary_vocabulary\n",
+      "\n",
+      "./out/tfrecorder-20201022-121014-to-tfr/tft_tmp/tftransform_tmp/64e013b30bc74404802fe2460761f588/variables:\n",
+      "\n",
+      "./out/tfrecorder-20201022-121014-to-tfr/tft_tmp/tftransform_tmp/cdcca595e2b641e3849589a01521bf9e:\n",
+      "saved_model.pb \u001B[1m\u001B[34m\u001B[47mvariables\u001B[m\u001B[m\n",
+      "\n",
+      "./out/tfrecorder-20201022-121014-to-tfr/tft_tmp/tftransform_tmp/cdcca595e2b641e3849589a01521bf9e/variables:\n",
+      "\n",
+      "./out/tfrecorder-20201022-121014-to-tfr/transform_fn:\n",
+      "\u001B[1m\u001B[34m\u001B[47massets\u001B[m\u001B[m         saved_model.pb \u001B[1m\u001B[34m\u001B[47mvariables\u001B[m\u001B[m\n",
+      "\n",
+      "./out/tfrecorder-20201022-121014-to-tfr/transform_fn/assets:\n",
+      "vocab_compute_and_apply_vocabulary_vocabulary\n",
+      "\n",
+      "./out/tfrecorder-20201022-121014-to-tfr/transform_fn/variables:\n",
+      "\n",
+      "./out/tfrecorder-20201022-121014-to-tfr/transformed_metadata:\n",
+      "schema.pbtxt\n",
+      "\n",
+      "./out/tfrecorder-20201022-121047-to-tfr:\n",
+      "discarded-data-00000-of-00001         \u001B[1m\u001B[34m\u001B[47mtransform_fn\u001B[m\u001B[m\n",
+      "test-00000-of-00001.tfrecord.gz       \u001B[1m\u001B[34m\u001B[47mtransformed_metadata\u001B[m\u001B[m\n",
+      "\u001B[1m\u001B[34m\u001B[47mtft_tmp\u001B[m\u001B[m                               validation-00000-of-00001.tfrecord.gz\n",
+      "train-00000-of-00001.tfrecord.gz\n",
+      "\n",
+      "./out/tfrecorder-20201022-121047-to-tfr/tft_tmp:\n",
+      "\u001B[1m\u001B[34m\u001B[47mtftransform_tmp\u001B[m\u001B[m\n",
+      "\n",
+      "./out/tfrecorder-20201022-121047-to-tfr/tft_tmp/tftransform_tmp:\n",
+      "\u001B[1m\u001B[34m\u001B[47m3f3303365f734478af972a71d5ad8f3c\u001B[m\u001B[m\n",
+      "\u001B[1m\u001B[34m\u001B[47mae6826008f984da98176a5c3ddcb25c4\u001B[m\u001B[m\n",
+      "\u001B[1m\u001B[34m\u001B[47md6b56908c11648cfa4534371dbb028b0\u001B[m\u001B[m\n",
+      "vocab_compute_and_apply_vocabulary_vocabulary\n",
+      "\n",
+      "./out/tfrecorder-20201022-121047-to-tfr/tft_tmp/tftransform_tmp/3f3303365f734478af972a71d5ad8f3c:\n",
+      "\u001B[1m\u001B[34m\u001B[47massets\u001B[m\u001B[m         saved_model.pb \u001B[1m\u001B[34m\u001B[47mvariables\u001B[m\u001B[m\n",
+      "\n",
+      "./out/tfrecorder-20201022-121047-to-tfr/tft_tmp/tftransform_tmp/3f3303365f734478af972a71d5ad8f3c/assets:\n",
+      "vocab_compute_and_apply_vocabulary_vocabulary\n",
+      "\n",
+      "./out/tfrecorder-20201022-121047-to-tfr/tft_tmp/tftransform_tmp/3f3303365f734478af972a71d5ad8f3c/variables:\n",
+      "\n",
+      "./out/tfrecorder-20201022-121047-to-tfr/tft_tmp/tftransform_tmp/ae6826008f984da98176a5c3ddcb25c4:\n",
+      "saved_model.pb \u001B[1m\u001B[34m\u001B[47mvariables\u001B[m\u001B[m\n",
+      "\n",
+      "./out/tfrecorder-20201022-121047-to-tfr/tft_tmp/tftransform_tmp/ae6826008f984da98176a5c3ddcb25c4/variables:\n",
+      "\n",
+      "./out/tfrecorder-20201022-121047-to-tfr/tft_tmp/tftransform_tmp/d6b56908c11648cfa4534371dbb028b0:\n",
+      "saved_model.pb \u001B[1m\u001B[34m\u001B[47mvariables\u001B[m\u001B[m\n",
+      "\n",
+      "./out/tfrecorder-20201022-121047-to-tfr/tft_tmp/tftransform_tmp/d6b56908c11648cfa4534371dbb028b0/variables:\n",
+      "\n",
+      "./out/tfrecorder-20201022-121047-to-tfr/transform_fn:\n",
+      "\u001B[1m\u001B[34m\u001B[47massets\u001B[m\u001B[m         saved_model.pb \u001B[1m\u001B[34m\u001B[47mvariables\u001B[m\u001B[m\n",
+      "\n",
+      "./out/tfrecorder-20201022-121047-to-tfr/transform_fn/assets:\n",
+      "vocab_compute_and_apply_vocabulary_vocabulary\n",
+      "\n",
+      "./out/tfrecorder-20201022-121047-to-tfr/transform_fn/variables:\n",
+      "\n",
+      "./out/tfrecorder-20201022-121047-to-tfr/transformed_metadata:\n",
+      "schema.pbtxt\n",
+      "\n",
+      "./out/tfrecorder-20201022-121052-to-tfr:\n",
+      "discarded-data-00000-of-00001         \u001B[1m\u001B[34m\u001B[47mtransform_fn\u001B[m\u001B[m\n",
+      "test-00000-of-00001.tfrecord.gz       \u001B[1m\u001B[34m\u001B[47mtransformed_metadata\u001B[m\u001B[m\n",
+      "\u001B[1m\u001B[34m\u001B[47mtft_tmp\u001B[m\u001B[m                               validation-00000-of-00001.tfrecord.gz\n",
+      "train-00000-of-00001.tfrecord.gz\n",
+      "\n",
+      "./out/tfrecorder-20201022-121052-to-tfr/tft_tmp:\n",
+      "\u001B[1m\u001B[34m\u001B[47mtftransform_tmp\u001B[m\u001B[m\n",
+      "\n",
+      "./out/tfrecorder-20201022-121052-to-tfr/tft_tmp/tftransform_tmp:\n",
+      "\u001B[1m\u001B[34m\u001B[47m3698f3931fd14bb9bff583e64c54ffb6\u001B[m\u001B[m\n",
+      "\u001B[1m\u001B[34m\u001B[47m38b82a6b79aa407db7893cfe1c77ecb7\u001B[m\u001B[m\n",
+      "\u001B[1m\u001B[34m\u001B[47md4824d2bad824e3ca30a0b1a4c0bf9b4\u001B[m\u001B[m\n",
+      "vocab_compute_and_apply_vocabulary_vocabulary\n",
+      "\n",
+      "./out/tfrecorder-20201022-121052-to-tfr/tft_tmp/tftransform_tmp/3698f3931fd14bb9bff583e64c54ffb6:\n",
+      "saved_model.pb \u001B[1m\u001B[34m\u001B[47mvariables\u001B[m\u001B[m\n",
+      "\n",
+      "./out/tfrecorder-20201022-121052-to-tfr/tft_tmp/tftransform_tmp/3698f3931fd14bb9bff583e64c54ffb6/variables:\n",
+      "\n",
+      "./out/tfrecorder-20201022-121052-to-tfr/tft_tmp/tftransform_tmp/38b82a6b79aa407db7893cfe1c77ecb7:\n",
+      "\u001B[1m\u001B[34m\u001B[47massets\u001B[m\u001B[m         saved_model.pb \u001B[1m\u001B[34m\u001B[47mvariables\u001B[m\u001B[m\n",
+      "\n",
+      "./out/tfrecorder-20201022-121052-to-tfr/tft_tmp/tftransform_tmp/38b82a6b79aa407db7893cfe1c77ecb7/assets:\n",
+      "vocab_compute_and_apply_vocabulary_vocabulary\n",
+      "\n",
+      "./out/tfrecorder-20201022-121052-to-tfr/tft_tmp/tftransform_tmp/38b82a6b79aa407db7893cfe1c77ecb7/variables:\n",
+      "\n",
+      "./out/tfrecorder-20201022-121052-to-tfr/tft_tmp/tftransform_tmp/d4824d2bad824e3ca30a0b1a4c0bf9b4:\n",
+      "saved_model.pb \u001B[1m\u001B[34m\u001B[47mvariables\u001B[m\u001B[m\n",
+      "\n",
+      "./out/tfrecorder-20201022-121052-to-tfr/tft_tmp/tftransform_tmp/d4824d2bad824e3ca30a0b1a4c0bf9b4/variables:\n",
+      "\n",
+      "./out/tfrecorder-20201022-121052-to-tfr/transform_fn:\n",
+      "\u001B[1m\u001B[34m\u001B[47massets\u001B[m\u001B[m         saved_model.pb \u001B[1m\u001B[34m\u001B[47mvariables\u001B[m\u001B[m\n",
+      "\n",
+      "./out/tfrecorder-20201022-121052-to-tfr/transform_fn/assets:\n",
+      "vocab_compute_and_apply_vocabulary_vocabulary\n",
+      "\n",
+      "./out/tfrecorder-20201022-121052-to-tfr/transform_fn/variables:\n",
+      "\n",
+      "./out/tfrecorder-20201022-121052-to-tfr/transformed_metadata:\n",
+      "schema.pbtxt\n",
+      "\n",
+      "./out/tfrecorder-20201022-122052-to-tfr:\n",
+      "discarded-data-00000-of-00001         \u001B[1m\u001B[34m\u001B[47mtransform_fn\u001B[m\u001B[m\n",
+      "test-00000-of-00001.tfrecord.gz       \u001B[1m\u001B[34m\u001B[47mtransformed_metadata\u001B[m\u001B[m\n",
+      "\u001B[1m\u001B[34m\u001B[47mtft_tmp\u001B[m\u001B[m                               validation-00000-of-00001.tfrecord.gz\n",
+      "train-00000-of-00001.tfrecord.gz\n",
+      "\n",
+      "./out/tfrecorder-20201022-122052-to-tfr/tft_tmp:\n",
+      "\u001B[1m\u001B[34m\u001B[47mtftransform_tmp\u001B[m\u001B[m\n",
+      "\n",
+      "./out/tfrecorder-20201022-122052-to-tfr/tft_tmp/tftransform_tmp:\n",
+      "\u001B[1m\u001B[34m\u001B[47m37663f58e2004143b615f97402b28947\u001B[m\u001B[m\n",
+      "\u001B[1m\u001B[34m\u001B[47mc96550c0a81b402480820390cde50a79\u001B[m\u001B[m\n",
+      "\u001B[1m\u001B[34m\u001B[47mcea75278cf5c4e43b99c8298a541d291\u001B[m\u001B[m\n",
+      "vocab_compute_and_apply_vocabulary_vocabulary\n",
+      "\n",
+      "./out/tfrecorder-20201022-122052-to-tfr/tft_tmp/tftransform_tmp/37663f58e2004143b615f97402b28947:\n",
+      "saved_model.pb \u001B[1m\u001B[34m\u001B[47mvariables\u001B[m\u001B[m\n",
+      "\n",
+      "./out/tfrecorder-20201022-122052-to-tfr/tft_tmp/tftransform_tmp/37663f58e2004143b615f97402b28947/variables:\n",
+      "\n",
+      "./out/tfrecorder-20201022-122052-to-tfr/tft_tmp/tftransform_tmp/c96550c0a81b402480820390cde50a79:\n",
+      "saved_model.pb \u001B[1m\u001B[34m\u001B[47mvariables\u001B[m\u001B[m\n",
+      "\n",
+      "./out/tfrecorder-20201022-122052-to-tfr/tft_tmp/tftransform_tmp/c96550c0a81b402480820390cde50a79/variables:\n",
+      "\n",
+      "./out/tfrecorder-20201022-122052-to-tfr/tft_tmp/tftransform_tmp/cea75278cf5c4e43b99c8298a541d291:\n",
+      "\u001B[1m\u001B[34m\u001B[47massets\u001B[m\u001B[m         saved_model.pb \u001B[1m\u001B[34m\u001B[47mvariables\u001B[m\u001B[m\n",
+      "\n",
+      "./out/tfrecorder-20201022-122052-to-tfr/tft_tmp/tftransform_tmp/cea75278cf5c4e43b99c8298a541d291/assets:\n",
+      "vocab_compute_and_apply_vocabulary_vocabulary\n",
+      "\n",
+      "./out/tfrecorder-20201022-122052-to-tfr/tft_tmp/tftransform_tmp/cea75278cf5c4e43b99c8298a541d291/variables:\n",
+      "\n",
+      "./out/tfrecorder-20201022-122052-to-tfr/transform_fn:\n",
+      "\u001B[1m\u001B[34m\u001B[47massets\u001B[m\u001B[m         saved_model.pb \u001B[1m\u001B[34m\u001B[47mvariables\u001B[m\u001B[m\n",
+      "\n",
+      "./out/tfrecorder-20201022-122052-to-tfr/transform_fn/assets:\n",
+      "vocab_compute_and_apply_vocabulary_vocabulary\n",
+      "\n",
+      "./out/tfrecorder-20201022-122052-to-tfr/transform_fn/variables:\n",
+      "\n",
+      "./out/tfrecorder-20201022-122052-to-tfr/transformed_metadata:\n",
+      "schema.pbtxt\n",
+      "\n",
+      "./out/tfrecorder-20201022-122403-to-tfr:\n",
+      "discarded-data-00000-of-00001         \u001B[1m\u001B[34m\u001B[47mtransform_fn\u001B[m\u001B[m\n",
+      "test-00000-of-00001.tfrecord.gz       \u001B[1m\u001B[34m\u001B[47mtransformed_metadata\u001B[m\u001B[m\n",
+      "\u001B[1m\u001B[34m\u001B[47mtft_tmp\u001B[m\u001B[m                               validation-00000-of-00001.tfrecord.gz\n",
+      "train-00000-of-00001.tfrecord.gz\n",
+      "\n",
+      "./out/tfrecorder-20201022-122403-to-tfr/tft_tmp:\n",
+      "\u001B[1m\u001B[34m\u001B[47mtftransform_tmp\u001B[m\u001B[m\n",
+      "\n",
+      "./out/tfrecorder-20201022-122403-to-tfr/tft_tmp/tftransform_tmp:\n",
+      "\u001B[1m\u001B[34m\u001B[47m3def6fa94a85469fba815ffce7bad60e\u001B[m\u001B[m\n",
+      "\u001B[1m\u001B[34m\u001B[47m836430f3087143909ebe6f18152d2ba0\u001B[m\u001B[m\n",
+      "\u001B[1m\u001B[34m\u001B[47ma8e39089895748f3a7f014bf71c766c6\u001B[m\u001B[m\n",
+      "vocab_compute_and_apply_vocabulary_vocabulary\n",
+      "\n",
+      "./out/tfrecorder-20201022-122403-to-tfr/tft_tmp/tftransform_tmp/3def6fa94a85469fba815ffce7bad60e:\n",
+      "saved_model.pb \u001B[1m\u001B[34m\u001B[47mvariables\u001B[m\u001B[m\n",
+      "\n",
+      "./out/tfrecorder-20201022-122403-to-tfr/tft_tmp/tftransform_tmp/3def6fa94a85469fba815ffce7bad60e/variables:\n",
+      "\n",
+      "./out/tfrecorder-20201022-122403-to-tfr/tft_tmp/tftransform_tmp/836430f3087143909ebe6f18152d2ba0:\n",
+      "saved_model.pb \u001B[1m\u001B[34m\u001B[47mvariables\u001B[m\u001B[m\n",
+      "\n",
+      "./out/tfrecorder-20201022-122403-to-tfr/tft_tmp/tftransform_tmp/836430f3087143909ebe6f18152d2ba0/variables:\n",
+      "\n",
+      "./out/tfrecorder-20201022-122403-to-tfr/tft_tmp/tftransform_tmp/a8e39089895748f3a7f014bf71c766c6:\n",
+      "\u001B[1m\u001B[34m\u001B[47massets\u001B[m\u001B[m         saved_model.pb \u001B[1m\u001B[34m\u001B[47mvariables\u001B[m\u001B[m\n",
+      "\n",
+      "./out/tfrecorder-20201022-122403-to-tfr/tft_tmp/tftransform_tmp/a8e39089895748f3a7f014bf71c766c6/assets:\n",
+      "vocab_compute_and_apply_vocabulary_vocabulary\n",
+      "\n",
+      "./out/tfrecorder-20201022-122403-to-tfr/tft_tmp/tftransform_tmp/a8e39089895748f3a7f014bf71c766c6/variables:\n",
+      "\n",
+      "./out/tfrecorder-20201022-122403-to-tfr/transform_fn:\n",
+      "\u001B[1m\u001B[34m\u001B[47massets\u001B[m\u001B[m         saved_model.pb \u001B[1m\u001B[34m\u001B[47mvariables\u001B[m\u001B[m\n",
+      "\n",
+      "./out/tfrecorder-20201022-122403-to-tfr/transform_fn/assets:\n",
+      "vocab_compute_and_apply_vocabulary_vocabulary\n",
+      "\n",
+      "./out/tfrecorder-20201022-122403-to-tfr/transform_fn/variables:\n",
+      "\n",
+      "./out/tfrecorder-20201022-122403-to-tfr/transformed_metadata:\n",
+      "schema.pbtxt\n",
+      "\n",
+      "./out/tfrecorder-20201022-122505-to-tfr:\n",
+      "discarded-data-00000-of-00001         \u001B[1m\u001B[34m\u001B[47mtransform_fn\u001B[m\u001B[m\n",
+      "test-00000-of-00001.tfrecord.gz       \u001B[1m\u001B[34m\u001B[47mtransformed_metadata\u001B[m\u001B[m\n",
+      "\u001B[1m\u001B[34m\u001B[47mtft_tmp\u001B[m\u001B[m                               validation-00000-of-00001.tfrecord.gz\n",
+      "train-00000-of-00001.tfrecord.gz\n",
+      "\n",
+      "./out/tfrecorder-20201022-122505-to-tfr/tft_tmp:\n",
+      "\u001B[1m\u001B[34m\u001B[47mtftransform_tmp\u001B[m\u001B[m\n",
+      "\n",
+      "./out/tfrecorder-20201022-122505-to-tfr/tft_tmp/tftransform_tmp:\n",
+      "\u001B[1m\u001B[34m\u001B[47m284f49ccb1ec48f081168bd4b85380d3\u001B[m\u001B[m\n",
+      "\u001B[1m\u001B[34m\u001B[47m45d603331df040eb81f19a4cd0d87708\u001B[m\u001B[m\n",
+      "\u001B[1m\u001B[34m\u001B[47ma32121048ec347b1a46632d69345bb72\u001B[m\u001B[m\n",
+      "vocab_compute_and_apply_vocabulary_vocabulary\n",
+      "\n",
+      "./out/tfrecorder-20201022-122505-to-tfr/tft_tmp/tftransform_tmp/284f49ccb1ec48f081168bd4b85380d3:\n",
+      "saved_model.pb \u001B[1m\u001B[34m\u001B[47mvariables\u001B[m\u001B[m\n",
+      "\n",
+      "./out/tfrecorder-20201022-122505-to-tfr/tft_tmp/tftransform_tmp/284f49ccb1ec48f081168bd4b85380d3/variables:\n",
+      "\n",
+      "./out/tfrecorder-20201022-122505-to-tfr/tft_tmp/tftransform_tmp/45d603331df040eb81f19a4cd0d87708:\n",
+      "saved_model.pb \u001B[1m\u001B[34m\u001B[47mvariables\u001B[m\u001B[m\n",
+      "\n",
+      "./out/tfrecorder-20201022-122505-to-tfr/tft_tmp/tftransform_tmp/45d603331df040eb81f19a4cd0d87708/variables:\n",
+      "\n",
+      "./out/tfrecorder-20201022-122505-to-tfr/tft_tmp/tftransform_tmp/a32121048ec347b1a46632d69345bb72:\n",
+      "\u001B[1m\u001B[34m\u001B[47massets\u001B[m\u001B[m         saved_model.pb \u001B[1m\u001B[34m\u001B[47mvariables\u001B[m\u001B[m\n",
+      "\n",
+      "./out/tfrecorder-20201022-122505-to-tfr/tft_tmp/tftransform_tmp/a32121048ec347b1a46632d69345bb72/assets:\n",
+      "vocab_compute_and_apply_vocabulary_vocabulary\n",
+      "\n",
+      "./out/tfrecorder-20201022-122505-to-tfr/tft_tmp/tftransform_tmp/a32121048ec347b1a46632d69345bb72/variables:\n",
+      "\n",
+      "./out/tfrecorder-20201022-122505-to-tfr/transform_fn:\n",
+      "\u001B[1m\u001B[34m\u001B[47massets\u001B[m\u001B[m         saved_model.pb \u001B[1m\u001B[34m\u001B[47mvariables\u001B[m\u001B[m\n",
+      "\n",
+      "./out/tfrecorder-20201022-122505-to-tfr/transform_fn/assets:\n",
+      "vocab_compute_and_apply_vocabulary_vocabulary\n",
+      "\n",
+      "./out/tfrecorder-20201022-122505-to-tfr/transform_fn/variables:\n",
+      "\n",
+      "./out/tfrecorder-20201022-122505-to-tfr/transformed_metadata:\n",
+      "schema.pbtxt\n",
+      "\n",
+      "./out/tfrecorder-20201022-122646-to-tfr:\n",
+      "discarded-data-00000-of-00001         \u001B[1m\u001B[34m\u001B[47mtransform_fn\u001B[m\u001B[m\n",
+      "test-00000-of-00001.tfrecord.gz       \u001B[1m\u001B[34m\u001B[47mtransformed_metadata\u001B[m\u001B[m\n",
+      "\u001B[1m\u001B[34m\u001B[47mtft_tmp\u001B[m\u001B[m                               validation-00000-of-00001.tfrecord.gz\n",
+      "train-00000-of-00001.tfrecord.gz\n",
+      "\n",
+      "./out/tfrecorder-20201022-122646-to-tfr/tft_tmp:\n",
+      "\u001B[1m\u001B[34m\u001B[47mtftransform_tmp\u001B[m\u001B[m\n",
+      "\n",
+      "./out/tfrecorder-20201022-122646-to-tfr/tft_tmp/tftransform_tmp:\n",
+      "\u001B[1m\u001B[34m\u001B[47m20307815fc944228be997a062f893ff4\u001B[m\u001B[m\n",
+      "\u001B[1m\u001B[34m\u001B[47m51489082db1646b8a19ca2c21a092ca0\u001B[m\u001B[m\n",
+      "\u001B[1m\u001B[34m\u001B[47m8001daeb500344e692ee9cc7d5a37e41\u001B[m\u001B[m\n",
+      "vocab_compute_and_apply_vocabulary_vocabulary\n",
+      "\n",
+      "./out/tfrecorder-20201022-122646-to-tfr/tft_tmp/tftransform_tmp/20307815fc944228be997a062f893ff4:\n",
+      "\u001B[1m\u001B[34m\u001B[47massets\u001B[m\u001B[m         saved_model.pb \u001B[1m\u001B[34m\u001B[47mvariables\u001B[m\u001B[m\n",
+      "\n",
+      "./out/tfrecorder-20201022-122646-to-tfr/tft_tmp/tftransform_tmp/20307815fc944228be997a062f893ff4/assets:\n",
+      "vocab_compute_and_apply_vocabulary_vocabulary\n",
+      "\n",
+      "./out/tfrecorder-20201022-122646-to-tfr/tft_tmp/tftransform_tmp/20307815fc944228be997a062f893ff4/variables:\n",
+      "\n",
+      "./out/tfrecorder-20201022-122646-to-tfr/tft_tmp/tftransform_tmp/51489082db1646b8a19ca2c21a092ca0:\n",
+      "saved_model.pb \u001B[1m\u001B[34m\u001B[47mvariables\u001B[m\u001B[m\n",
+      "\n",
+      "./out/tfrecorder-20201022-122646-to-tfr/tft_tmp/tftransform_tmp/51489082db1646b8a19ca2c21a092ca0/variables:\n",
+      "\n",
+      "./out/tfrecorder-20201022-122646-to-tfr/tft_tmp/tftransform_tmp/8001daeb500344e692ee9cc7d5a37e41:\n",
+      "saved_model.pb \u001B[1m\u001B[34m\u001B[47mvariables\u001B[m\u001B[m\n",
+      "\n",
+      "./out/tfrecorder-20201022-122646-to-tfr/tft_tmp/tftransform_tmp/8001daeb500344e692ee9cc7d5a37e41/variables:\n",
+      "\n",
+      "./out/tfrecorder-20201022-122646-to-tfr/transform_fn:\n",
+      "\u001B[1m\u001B[34m\u001B[47massets\u001B[m\u001B[m         saved_model.pb \u001B[1m\u001B[34m\u001B[47mvariables\u001B[m\u001B[m\n",
+      "\n",
+      "./out/tfrecorder-20201022-122646-to-tfr/transform_fn/assets:\n",
+      "vocab_compute_and_apply_vocabulary_vocabulary\n",
+      "\n",
+      "./out/tfrecorder-20201022-122646-to-tfr/transform_fn/variables:\n",
+      "\n",
+      "./out/tfrecorder-20201022-122646-to-tfr/transformed_metadata:\n",
+      "schema.pbtxt\n",
+      "\n",
+      "./out/tfrecorder-20201022-122743-to-tfr:\n",
+      "discarded-data-00000-of-00001         \u001B[1m\u001B[34m\u001B[47mtransform_fn\u001B[m\u001B[m\n",
+      "test-00000-of-00001.tfrecord.gz       \u001B[1m\u001B[34m\u001B[47mtransformed_metadata\u001B[m\u001B[m\n",
+      "\u001B[1m\u001B[34m\u001B[47mtft_tmp\u001B[m\u001B[m                               validation-00000-of-00001.tfrecord.gz\n",
+      "train-00000-of-00001.tfrecord.gz\n",
+      "\n",
+      "./out/tfrecorder-20201022-122743-to-tfr/tft_tmp:\n",
+      "\u001B[1m\u001B[34m\u001B[47mtftransform_tmp\u001B[m\u001B[m\n",
+      "\n",
+      "./out/tfrecorder-20201022-122743-to-tfr/tft_tmp/tftransform_tmp:\n",
+      "\u001B[1m\u001B[34m\u001B[47m0a7173cf1670405db8519cc86e17c845\u001B[m\u001B[m\n",
+      "\u001B[1m\u001B[34m\u001B[47m440c9bef6def42e3acf7b327b6295170\u001B[m\u001B[m\n",
+      "\u001B[1m\u001B[34m\u001B[47mf6bc80df88424de69e461e9b8b3cc2ea\u001B[m\u001B[m\n",
+      "vocab_compute_and_apply_vocabulary_vocabulary\n",
+      "\n",
+      "./out/tfrecorder-20201022-122743-to-tfr/tft_tmp/tftransform_tmp/0a7173cf1670405db8519cc86e17c845:\n",
+      "saved_model.pb \u001B[1m\u001B[34m\u001B[47mvariables\u001B[m\u001B[m\n",
+      "\n",
+      "./out/tfrecorder-20201022-122743-to-tfr/tft_tmp/tftransform_tmp/0a7173cf1670405db8519cc86e17c845/variables:\n",
+      "\n",
+      "./out/tfrecorder-20201022-122743-to-tfr/tft_tmp/tftransform_tmp/440c9bef6def42e3acf7b327b6295170:\n",
+      "\u001B[1m\u001B[34m\u001B[47massets\u001B[m\u001B[m         saved_model.pb \u001B[1m\u001B[34m\u001B[47mvariables\u001B[m\u001B[m\n",
+      "\n",
+      "./out/tfrecorder-20201022-122743-to-tfr/tft_tmp/tftransform_tmp/440c9bef6def42e3acf7b327b6295170/assets:\n",
+      "vocab_compute_and_apply_vocabulary_vocabulary\n",
+      "\n",
+      "./out/tfrecorder-20201022-122743-to-tfr/tft_tmp/tftransform_tmp/440c9bef6def42e3acf7b327b6295170/variables:\n",
+      "\n",
+      "./out/tfrecorder-20201022-122743-to-tfr/tft_tmp/tftransform_tmp/f6bc80df88424de69e461e9b8b3cc2ea:\n",
+      "saved_model.pb \u001B[1m\u001B[34m\u001B[47mvariables\u001B[m\u001B[m\n",
+      "\n",
+      "./out/tfrecorder-20201022-122743-to-tfr/tft_tmp/tftransform_tmp/f6bc80df88424de69e461e9b8b3cc2ea/variables:\n",
+      "\n",
+      "./out/tfrecorder-20201022-122743-to-tfr/transform_fn:\n",
+      "\u001B[1m\u001B[34m\u001B[47massets\u001B[m\u001B[m         saved_model.pb \u001B[1m\u001B[34m\u001B[47mvariables\u001B[m\u001B[m\n",
+      "\n",
+      "./out/tfrecorder-20201022-122743-to-tfr/transform_fn/assets:\n",
+      "vocab_compute_and_apply_vocabulary_vocabulary\n",
+      "\n",
+      "./out/tfrecorder-20201022-122743-to-tfr/transform_fn/variables:\n",
+      "\n",
+      "./out/tfrecorder-20201022-122743-to-tfr/transformed_metadata:\n",
+      "schema.pbtxt\n",
+      "\n",
+      "./out/tfrecorder-20201022-123126-to-tfr:\n",
+      "discarded-data-00000-of-00001         \u001B[1m\u001B[34m\u001B[47mtransform_fn\u001B[m\u001B[m\n",
+      "test-00000-of-00001.tfrecord.gz       \u001B[1m\u001B[34m\u001B[47mtransformed_metadata\u001B[m\u001B[m\n",
+      "\u001B[1m\u001B[34m\u001B[47mtft_tmp\u001B[m\u001B[m                               validation-00000-of-00001.tfrecord.gz\n",
+      "train-00000-of-00001.tfrecord.gz\n",
+      "\n",
+      "./out/tfrecorder-20201022-123126-to-tfr/tft_tmp:\n",
+      "\u001B[1m\u001B[34m\u001B[47mtftransform_tmp\u001B[m\u001B[m\n",
+      "\n",
+      "./out/tfrecorder-20201022-123126-to-tfr/tft_tmp/tftransform_tmp:\n",
+      "\u001B[1m\u001B[34m\u001B[47m1cb2ecd7d5ae47c1a6fc2d5bea3ce504\u001B[m\u001B[m\n",
+      "\u001B[1m\u001B[34m\u001B[47m3877b46d90094b89a8f95fa1d625525a\u001B[m\u001B[m\n",
+      "\u001B[1m\u001B[34m\u001B[47m9a83c37d67f54e1cab8ab824c53926b6\u001B[m\u001B[m\n",
+      "vocab_compute_and_apply_vocabulary_vocabulary\n",
+      "\n",
+      "./out/tfrecorder-20201022-123126-to-tfr/tft_tmp/tftransform_tmp/1cb2ecd7d5ae47c1a6fc2d5bea3ce504:\n",
+      "saved_model.pb \u001B[1m\u001B[34m\u001B[47mvariables\u001B[m\u001B[m\n",
+      "\n",
+      "./out/tfrecorder-20201022-123126-to-tfr/tft_tmp/tftransform_tmp/1cb2ecd7d5ae47c1a6fc2d5bea3ce504/variables:\n",
+      "\n",
+      "./out/tfrecorder-20201022-123126-to-tfr/tft_tmp/tftransform_tmp/3877b46d90094b89a8f95fa1d625525a:\n",
+      "\u001B[1m\u001B[34m\u001B[47massets\u001B[m\u001B[m         saved_model.pb \u001B[1m\u001B[34m\u001B[47mvariables\u001B[m\u001B[m\n",
+      "\n",
+      "./out/tfrecorder-20201022-123126-to-tfr/tft_tmp/tftransform_tmp/3877b46d90094b89a8f95fa1d625525a/assets:\n",
+      "vocab_compute_and_apply_vocabulary_vocabulary\n",
+      "\n",
+      "./out/tfrecorder-20201022-123126-to-tfr/tft_tmp/tftransform_tmp/3877b46d90094b89a8f95fa1d625525a/variables:\n",
+      "\n",
+      "./out/tfrecorder-20201022-123126-to-tfr/tft_tmp/tftransform_tmp/9a83c37d67f54e1cab8ab824c53926b6:\n",
+      "saved_model.pb \u001B[1m\u001B[34m\u001B[47mvariables\u001B[m\u001B[m\n",
+      "\n",
+      "./out/tfrecorder-20201022-123126-to-tfr/tft_tmp/tftransform_tmp/9a83c37d67f54e1cab8ab824c53926b6/variables:\n",
+      "\n",
+      "./out/tfrecorder-20201022-123126-to-tfr/transform_fn:\n",
+      "\u001B[1m\u001B[34m\u001B[47massets\u001B[m\u001B[m         saved_model.pb \u001B[1m\u001B[34m\u001B[47mvariables\u001B[m\u001B[m\n",
+      "\n",
+      "./out/tfrecorder-20201022-123126-to-tfr/transform_fn/assets:\n",
+      "vocab_compute_and_apply_vocabulary_vocabulary\n",
+      "\n",
+      "./out/tfrecorder-20201022-123126-to-tfr/transform_fn/variables:\n",
+      "\n",
+      "./out/tfrecorder-20201022-123126-to-tfr/transformed_metadata:\n",
+      "schema.pbtxt\n",
+      "\n",
+      "./out/tfrecorder-20201022-123151-to-tfr:\n",
+      "discarded-data-00000-of-00001         \u001B[1m\u001B[34m\u001B[47mtransform_fn\u001B[m\u001B[m\n",
+      "test-00000-of-00001.tfrecord.gz       \u001B[1m\u001B[34m\u001B[47mtransformed_metadata\u001B[m\u001B[m\n",
+      "\u001B[1m\u001B[34m\u001B[47mtft_tmp\u001B[m\u001B[m                               validation-00000-of-00001.tfrecord.gz\n",
+      "train-00000-of-00001.tfrecord.gz\n",
+      "\n",
+      "./out/tfrecorder-20201022-123151-to-tfr/tft_tmp:\n",
+      "\u001B[1m\u001B[34m\u001B[47mtftransform_tmp\u001B[m\u001B[m\n",
+      "\n",
+      "./out/tfrecorder-20201022-123151-to-tfr/tft_tmp/tftransform_tmp:\n",
+      "\u001B[1m\u001B[34m\u001B[47m383f6db81e674c18aa598803b2735b12\u001B[m\u001B[m\n",
+      "\u001B[1m\u001B[34m\u001B[47m765e3ac1b7794cc19b90025229929c91\u001B[m\u001B[m\n",
+      "\u001B[1m\u001B[34m\u001B[47ma9c454acb0b94209b42b589953dc40dd\u001B[m\u001B[m\n",
+      "vocab_compute_and_apply_vocabulary_vocabulary\n",
+      "\n",
+      "./out/tfrecorder-20201022-123151-to-tfr/tft_tmp/tftransform_tmp/383f6db81e674c18aa598803b2735b12:\n",
+      "saved_model.pb \u001B[1m\u001B[34m\u001B[47mvariables\u001B[m\u001B[m\n",
+      "\n",
+      "./out/tfrecorder-20201022-123151-to-tfr/tft_tmp/tftransform_tmp/383f6db81e674c18aa598803b2735b12/variables:\n",
+      "\n",
+      "./out/tfrecorder-20201022-123151-to-tfr/tft_tmp/tftransform_tmp/765e3ac1b7794cc19b90025229929c91:\n",
+      "saved_model.pb \u001B[1m\u001B[34m\u001B[47mvariables\u001B[m\u001B[m\n",
+      "\n",
+      "./out/tfrecorder-20201022-123151-to-tfr/tft_tmp/tftransform_tmp/765e3ac1b7794cc19b90025229929c91/variables:\n",
+      "\n",
+      "./out/tfrecorder-20201022-123151-to-tfr/tft_tmp/tftransform_tmp/a9c454acb0b94209b42b589953dc40dd:\n",
+      "\u001B[1m\u001B[34m\u001B[47massets\u001B[m\u001B[m         saved_model.pb \u001B[1m\u001B[34m\u001B[47mvariables\u001B[m\u001B[m\n",
+      "\n",
+      "./out/tfrecorder-20201022-123151-to-tfr/tft_tmp/tftransform_tmp/a9c454acb0b94209b42b589953dc40dd/assets:\n",
+      "vocab_compute_and_apply_vocabulary_vocabulary\n",
+      "\n",
+      "./out/tfrecorder-20201022-123151-to-tfr/tft_tmp/tftransform_tmp/a9c454acb0b94209b42b589953dc40dd/variables:\n",
+      "\n",
+      "./out/tfrecorder-20201022-123151-to-tfr/transform_fn:\n",
+      "\u001B[1m\u001B[34m\u001B[47massets\u001B[m\u001B[m         saved_model.pb \u001B[1m\u001B[34m\u001B[47mvariables\u001B[m\u001B[m\n",
+      "\n",
+      "./out/tfrecorder-20201022-123151-to-tfr/transform_fn/assets:\n",
+      "vocab_compute_and_apply_vocabulary_vocabulary\n",
+      "\n",
+      "./out/tfrecorder-20201022-123151-to-tfr/transform_fn/variables:\n",
+      "\n",
+      "./out/tfrecorder-20201022-123151-to-tfr/transformed_metadata:\n",
+      "schema.pbtxt\n",
+      "\n",
+      "./out/tfrecorder-20201022-132122-to-tfr:\n",
+      "\u001B[1m\u001B[34m\u001B[47mtft_tmp\u001B[m\u001B[m\n",
+      "\n",
+      "./out/tfrecorder-20201022-132122-to-tfr/tft_tmp:\n",
+      "\u001B[1m\u001B[34m\u001B[47mtftransform_tmp\u001B[m\u001B[m\n",
+      "\n",
+      "./out/tfrecorder-20201022-132122-to-tfr/tft_tmp/tftransform_tmp:\n",
+      "\u001B[1m\u001B[34m\u001B[47m1b7a7957b4c84bb8817a610ae5597254\u001B[m\u001B[m \u001B[1m\u001B[34m\u001B[47m3ab241e4838c47faa1307430f674cba0\u001B[m\u001B[m\n",
+      "\n",
+      "./out/tfrecorder-20201022-132122-to-tfr/tft_tmp/tftransform_tmp/1b7a7957b4c84bb8817a610ae5597254:\n",
+      "saved_model.pb \u001B[1m\u001B[34m\u001B[47mvariables\u001B[m\u001B[m\n",
+      "\n",
+      "./out/tfrecorder-20201022-132122-to-tfr/tft_tmp/tftransform_tmp/1b7a7957b4c84bb8817a610ae5597254/variables:\n",
+      "\n",
+      "./out/tfrecorder-20201022-132122-to-tfr/tft_tmp/tftransform_tmp/3ab241e4838c47faa1307430f674cba0:\n",
+      "saved_model.pb \u001B[1m\u001B[34m\u001B[47mvariables\u001B[m\u001B[m\n",
+      "\n",
+      "./out/tfrecorder-20201022-132122-to-tfr/tft_tmp/tftransform_tmp/3ab241e4838c47faa1307430f674cba0/variables:\n",
+      "\n",
+      "./out/tfrecorder-20201022-132135-to-tfr:\n",
+      "discarded-data-00000-of-00001         \u001B[1m\u001B[34m\u001B[47mtransform_fn\u001B[m\u001B[m\n",
+      "test-00000-of-00001.tfrecord.gz       \u001B[1m\u001B[34m\u001B[47mtransformed_metadata\u001B[m\u001B[m\n",
+      "\u001B[1m\u001B[34m\u001B[47mtft_tmp\u001B[m\u001B[m                               validation-00000-of-00001.tfrecord.gz\n",
+      "train-00000-of-00001.tfrecord.gz\n",
+      "\n",
+      "./out/tfrecorder-20201022-132135-to-tfr/tft_tmp:\n",
+      "\u001B[1m\u001B[34m\u001B[47mtftransform_tmp\u001B[m\u001B[m\n",
+      "\n",
+      "./out/tfrecorder-20201022-132135-to-tfr/tft_tmp/tftransform_tmp:\n",
+      "\u001B[1m\u001B[34m\u001B[47m617158aef51446619f1193dfa6ee30fa\u001B[m\u001B[m\n",
+      "\u001B[1m\u001B[34m\u001B[47m882ecbb5ad6f4e0a8692237ed2af6d72\u001B[m\u001B[m\n",
+      "\u001B[1m\u001B[34m\u001B[47md2a2597fb5764de39563d1e3828b2cd4\u001B[m\u001B[m\n",
+      "vocab_compute_and_apply_vocabulary_vocabulary\n",
+      "\n",
+      "./out/tfrecorder-20201022-132135-to-tfr/tft_tmp/tftransform_tmp/617158aef51446619f1193dfa6ee30fa:\n",
+      "saved_model.pb \u001B[1m\u001B[34m\u001B[47mvariables\u001B[m\u001B[m\n",
+      "\n",
+      "./out/tfrecorder-20201022-132135-to-tfr/tft_tmp/tftransform_tmp/617158aef51446619f1193dfa6ee30fa/variables:\n",
+      "\n",
+      "./out/tfrecorder-20201022-132135-to-tfr/tft_tmp/tftransform_tmp/882ecbb5ad6f4e0a8692237ed2af6d72:\n",
+      "saved_model.pb \u001B[1m\u001B[34m\u001B[47mvariables\u001B[m\u001B[m\n",
+      "\n",
+      "./out/tfrecorder-20201022-132135-to-tfr/tft_tmp/tftransform_tmp/882ecbb5ad6f4e0a8692237ed2af6d72/variables:\n",
+      "\n",
+      "./out/tfrecorder-20201022-132135-to-tfr/tft_tmp/tftransform_tmp/d2a2597fb5764de39563d1e3828b2cd4:\n",
+      "\u001B[1m\u001B[34m\u001B[47massets\u001B[m\u001B[m         saved_model.pb \u001B[1m\u001B[34m\u001B[47mvariables\u001B[m\u001B[m\n",
+      "\n",
+      "./out/tfrecorder-20201022-132135-to-tfr/tft_tmp/tftransform_tmp/d2a2597fb5764de39563d1e3828b2cd4/assets:\n",
+      "vocab_compute_and_apply_vocabulary_vocabulary\n",
+      "\n",
+      "./out/tfrecorder-20201022-132135-to-tfr/tft_tmp/tftransform_tmp/d2a2597fb5764de39563d1e3828b2cd4/variables:\n",
+      "\n",
+      "./out/tfrecorder-20201022-132135-to-tfr/transform_fn:\n",
+      "\u001B[1m\u001B[34m\u001B[47massets\u001B[m\u001B[m         saved_model.pb \u001B[1m\u001B[34m\u001B[47mvariables\u001B[m\u001B[m\n",
+      "\n",
+      "./out/tfrecorder-20201022-132135-to-tfr/transform_fn/assets:\n",
+      "vocab_compute_and_apply_vocabulary_vocabulary\n",
+      "\n",
+      "./out/tfrecorder-20201022-132135-to-tfr/transform_fn/variables:\n",
+      "\n",
+      "./out/tfrecorder-20201022-132135-to-tfr/transformed_metadata:\n",
+      "schema.pbtxt\n",
+      "\n",
+      "./out/tfrecorder-20201022-132406-to-tfr:\n",
+      "discarded-data-00000-of-00001         \u001B[1m\u001B[34m\u001B[47mtransform_fn\u001B[m\u001B[m\n",
+      "test-00000-of-00001.tfrecord.gz       \u001B[1m\u001B[34m\u001B[47mtransformed_metadata\u001B[m\u001B[m\n",
+      "\u001B[1m\u001B[34m\u001B[47mtft_tmp\u001B[m\u001B[m                               validation-00000-of-00001.tfrecord.gz\n",
+      "train-00000-of-00001.tfrecord.gz\n",
+      "\n",
+      "./out/tfrecorder-20201022-132406-to-tfr/tft_tmp:\n",
+      "\u001B[1m\u001B[34m\u001B[47mtftransform_tmp\u001B[m\u001B[m\n",
+      "\n",
+      "./out/tfrecorder-20201022-132406-to-tfr/tft_tmp/tftransform_tmp:\n",
+      "\u001B[1m\u001B[34m\u001B[47m4a28588557c1419895114bc6edb695be\u001B[m\u001B[m\n",
+      "\u001B[1m\u001B[34m\u001B[47m542d64a62cf74f8cbe9a8916790419ce\u001B[m\u001B[m\n",
+      "\u001B[1m\u001B[34m\u001B[47m5cd38819d7c24c23a6ede32bd4547a2a\u001B[m\u001B[m\n",
+      "vocab_compute_and_apply_vocabulary_vocabulary\n",
+      "\n",
+      "./out/tfrecorder-20201022-132406-to-tfr/tft_tmp/tftransform_tmp/4a28588557c1419895114bc6edb695be:\n",
+      "saved_model.pb \u001B[1m\u001B[34m\u001B[47mvariables\u001B[m\u001B[m\n",
+      "\n",
+      "./out/tfrecorder-20201022-132406-to-tfr/tft_tmp/tftransform_tmp/4a28588557c1419895114bc6edb695be/variables:\n",
+      "\n",
+      "./out/tfrecorder-20201022-132406-to-tfr/tft_tmp/tftransform_tmp/542d64a62cf74f8cbe9a8916790419ce:\n",
+      "\u001B[1m\u001B[34m\u001B[47massets\u001B[m\u001B[m         saved_model.pb \u001B[1m\u001B[34m\u001B[47mvariables\u001B[m\u001B[m\n",
+      "\n",
+      "./out/tfrecorder-20201022-132406-to-tfr/tft_tmp/tftransform_tmp/542d64a62cf74f8cbe9a8916790419ce/assets:\n",
+      "vocab_compute_and_apply_vocabulary_vocabulary\n",
+      "\n",
+      "./out/tfrecorder-20201022-132406-to-tfr/tft_tmp/tftransform_tmp/542d64a62cf74f8cbe9a8916790419ce/variables:\n",
+      "\n",
+      "./out/tfrecorder-20201022-132406-to-tfr/tft_tmp/tftransform_tmp/5cd38819d7c24c23a6ede32bd4547a2a:\n",
+      "saved_model.pb \u001B[1m\u001B[34m\u001B[47mvariables\u001B[m\u001B[m\n",
+      "\n",
+      "./out/tfrecorder-20201022-132406-to-tfr/tft_tmp/tftransform_tmp/5cd38819d7c24c23a6ede32bd4547a2a/variables:\n",
+      "\n",
+      "./out/tfrecorder-20201022-132406-to-tfr/transform_fn:\n",
+      "\u001B[1m\u001B[34m\u001B[47massets\u001B[m\u001B[m         saved_model.pb \u001B[1m\u001B[34m\u001B[47mvariables\u001B[m\u001B[m\n",
+      "\n",
+      "./out/tfrecorder-20201022-132406-to-tfr/transform_fn/assets:\n",
+      "vocab_compute_and_apply_vocabulary_vocabulary\n",
+      "\n",
+      "./out/tfrecorder-20201022-132406-to-tfr/transform_fn/variables:\n",
+      "\n",
+      "./out/tfrecorder-20201022-132406-to-tfr/transformed_metadata:\n",
+      "schema.pbtxt\n",
+      "\n",
+      "./out/tfrecorder-20201022-132701-to-tfr:\n",
+      "discarded-data-00000-of-00001         \u001B[1m\u001B[34m\u001B[47mtransform_fn\u001B[m\u001B[m\n",
+      "test-00000-of-00001.tfrecord.gz       \u001B[1m\u001B[34m\u001B[47mtransformed_metadata\u001B[m\u001B[m\n",
+      "\u001B[1m\u001B[34m\u001B[47mtft_tmp\u001B[m\u001B[m                               validation-00000-of-00001.tfrecord.gz\n",
+      "train-00000-of-00001.tfrecord.gz\n",
+      "\n",
+      "./out/tfrecorder-20201022-132701-to-tfr/tft_tmp:\n",
+      "\u001B[1m\u001B[34m\u001B[47mtftransform_tmp\u001B[m\u001B[m\n",
+      "\n",
+      "./out/tfrecorder-20201022-132701-to-tfr/tft_tmp/tftransform_tmp:\n",
+      "\u001B[1m\u001B[34m\u001B[47m201f1f94018a490bbf5877bcf6b5c5db\u001B[m\u001B[m\n",
+      "\u001B[1m\u001B[34m\u001B[47m6438afee6e484bbd810a20d437f84a84\u001B[m\u001B[m\n",
+      "\u001B[1m\u001B[34m\u001B[47mc338e97fe31b4fc084327897c9a07ecc\u001B[m\u001B[m\n",
+      "vocab_compute_and_apply_vocabulary_vocabulary\n",
+      "\n",
+      "./out/tfrecorder-20201022-132701-to-tfr/tft_tmp/tftransform_tmp/201f1f94018a490bbf5877bcf6b5c5db:\n",
+      "saved_model.pb \u001B[1m\u001B[34m\u001B[47mvariables\u001B[m\u001B[m\n",
+      "\n",
+      "./out/tfrecorder-20201022-132701-to-tfr/tft_tmp/tftransform_tmp/201f1f94018a490bbf5877bcf6b5c5db/variables:\n",
+      "\n",
+      "./out/tfrecorder-20201022-132701-to-tfr/tft_tmp/tftransform_tmp/6438afee6e484bbd810a20d437f84a84:\n",
+      "\u001B[1m\u001B[34m\u001B[47massets\u001B[m\u001B[m         saved_model.pb \u001B[1m\u001B[34m\u001B[47mvariables\u001B[m\u001B[m\n",
+      "\n",
+      "./out/tfrecorder-20201022-132701-to-tfr/tft_tmp/tftransform_tmp/6438afee6e484bbd810a20d437f84a84/assets:\n",
+      "vocab_compute_and_apply_vocabulary_vocabulary\n",
+      "\n",
+      "./out/tfrecorder-20201022-132701-to-tfr/tft_tmp/tftransform_tmp/6438afee6e484bbd810a20d437f84a84/variables:\n",
+      "\n",
+      "./out/tfrecorder-20201022-132701-to-tfr/tft_tmp/tftransform_tmp/c338e97fe31b4fc084327897c9a07ecc:\n",
+      "saved_model.pb \u001B[1m\u001B[34m\u001B[47mvariables\u001B[m\u001B[m\n",
+      "\n",
+      "./out/tfrecorder-20201022-132701-to-tfr/tft_tmp/tftransform_tmp/c338e97fe31b4fc084327897c9a07ecc/variables:\n",
+      "\n",
+      "./out/tfrecorder-20201022-132701-to-tfr/transform_fn:\n",
+      "\u001B[1m\u001B[34m\u001B[47massets\u001B[m\u001B[m         saved_model.pb \u001B[1m\u001B[34m\u001B[47mvariables\u001B[m\u001B[m\n",
+      "\n",
+      "./out/tfrecorder-20201022-132701-to-tfr/transform_fn/assets:\n",
+      "vocab_compute_and_apply_vocabulary_vocabulary\n",
+      "\n",
+      "./out/tfrecorder-20201022-132701-to-tfr/transform_fn/variables:\n",
+      "\n",
+      "./out/tfrecorder-20201022-132701-to-tfr/transformed_metadata:\n",
+      "schema.pbtxt\n",
+      "\n",
+      "./out/tfrecorder-20201022-133529-to-tfr:\n",
+      "discarded-data-00000-of-00001         \u001B[1m\u001B[34m\u001B[47mtransform_fn\u001B[m\u001B[m\n",
+      "test-00000-of-00001.tfrecord.gz       \u001B[1m\u001B[34m\u001B[47mtransformed_metadata\u001B[m\u001B[m\n",
+      "\u001B[1m\u001B[34m\u001B[47mtft_tmp\u001B[m\u001B[m                               validation-00000-of-00001.tfrecord.gz\n",
+      "train-00000-of-00001.tfrecord.gz\n",
+      "\n",
+      "./out/tfrecorder-20201022-133529-to-tfr/tft_tmp:\n",
+      "\u001B[1m\u001B[34m\u001B[47mtftransform_tmp\u001B[m\u001B[m\n",
+      "\n",
+      "./out/tfrecorder-20201022-133529-to-tfr/tft_tmp/tftransform_tmp:\n",
+      "\u001B[1m\u001B[34m\u001B[47m89e69e97f7bb4ab791373df35a281826\u001B[m\u001B[m\n",
+      "\u001B[1m\u001B[34m\u001B[47mc957d83ca4f340b6882ce01e84e225c7\u001B[m\u001B[m\n",
+      "\u001B[1m\u001B[34m\u001B[47mec546d0b3388408b90d460a19ec2e16b\u001B[m\u001B[m\n",
+      "vocab_compute_and_apply_vocabulary_vocabulary\n",
+      "\n",
+      "./out/tfrecorder-20201022-133529-to-tfr/tft_tmp/tftransform_tmp/89e69e97f7bb4ab791373df35a281826:\n",
+      "\u001B[1m\u001B[34m\u001B[47massets\u001B[m\u001B[m         saved_model.pb \u001B[1m\u001B[34m\u001B[47mvariables\u001B[m\u001B[m\n",
+      "\n",
+      "./out/tfrecorder-20201022-133529-to-tfr/tft_tmp/tftransform_tmp/89e69e97f7bb4ab791373df35a281826/assets:\n",
+      "vocab_compute_and_apply_vocabulary_vocabulary\n",
+      "\n",
+      "./out/tfrecorder-20201022-133529-to-tfr/tft_tmp/tftransform_tmp/89e69e97f7bb4ab791373df35a281826/variables:\n",
+      "\n",
+      "./out/tfrecorder-20201022-133529-to-tfr/tft_tmp/tftransform_tmp/c957d83ca4f340b6882ce01e84e225c7:\n",
+      "saved_model.pb \u001B[1m\u001B[34m\u001B[47mvariables\u001B[m\u001B[m\n",
+      "\n",
+      "./out/tfrecorder-20201022-133529-to-tfr/tft_tmp/tftransform_tmp/c957d83ca4f340b6882ce01e84e225c7/variables:\n",
+      "\n",
+      "./out/tfrecorder-20201022-133529-to-tfr/tft_tmp/tftransform_tmp/ec546d0b3388408b90d460a19ec2e16b:\n",
+      "saved_model.pb \u001B[1m\u001B[34m\u001B[47mvariables\u001B[m\u001B[m\n",
+      "\n",
+      "./out/tfrecorder-20201022-133529-to-tfr/tft_tmp/tftransform_tmp/ec546d0b3388408b90d460a19ec2e16b/variables:\n",
+      "\n",
+      "./out/tfrecorder-20201022-133529-to-tfr/transform_fn:\n",
+      "\u001B[1m\u001B[34m\u001B[47massets\u001B[m\u001B[m         saved_model.pb \u001B[1m\u001B[34m\u001B[47mvariables\u001B[m\u001B[m\n",
+      "\n",
+      "./out/tfrecorder-20201022-133529-to-tfr/transform_fn/assets:\n",
+      "vocab_compute_and_apply_vocabulary_vocabulary\n",
+      "\n",
+      "./out/tfrecorder-20201022-133529-to-tfr/transform_fn/variables:\n",
+      "\n",
+      "./out/tfrecorder-20201022-133529-to-tfr/transformed_metadata:\n",
+      "schema.pbtxt\n",
+      "\n",
+      "./out/tfrecorder-20201022-133624-to-tfr:\n",
+      "discarded-data-00000-of-00001         \u001B[1m\u001B[34m\u001B[47mtransform_fn\u001B[m\u001B[m\n",
+      "test-00000-of-00001.tfrecord.gz       \u001B[1m\u001B[34m\u001B[47mtransformed_metadata\u001B[m\u001B[m\n",
+      "\u001B[1m\u001B[34m\u001B[47mtft_tmp\u001B[m\u001B[m                               validation-00000-of-00001.tfrecord.gz\n",
+      "train-00000-of-00001.tfrecord.gz\n",
+      "\n",
+      "./out/tfrecorder-20201022-133624-to-tfr/tft_tmp:\n",
+      "\u001B[1m\u001B[34m\u001B[47mtftransform_tmp\u001B[m\u001B[m\n",
+      "\n",
+      "./out/tfrecorder-20201022-133624-to-tfr/tft_tmp/tftransform_tmp:\n",
+      "\u001B[1m\u001B[34m\u001B[47m220e294d5b9a44ba90e84f357b24ec28\u001B[m\u001B[m\n",
+      "\u001B[1m\u001B[34m\u001B[47m91b0706c6a334575b676aa43a21b29ea\u001B[m\u001B[m\n",
+      "\u001B[1m\u001B[34m\u001B[47mac1204e611dd42228c5a4a04d5dd92ef\u001B[m\u001B[m\n",
+      "vocab_compute_and_apply_vocabulary_vocabulary\n",
+      "\n",
+      "./out/tfrecorder-20201022-133624-to-tfr/tft_tmp/tftransform_tmp/220e294d5b9a44ba90e84f357b24ec28:\n",
+      "\u001B[1m\u001B[34m\u001B[47massets\u001B[m\u001B[m         saved_model.pb \u001B[1m\u001B[34m\u001B[47mvariables\u001B[m\u001B[m\n",
+      "\n",
+      "./out/tfrecorder-20201022-133624-to-tfr/tft_tmp/tftransform_tmp/220e294d5b9a44ba90e84f357b24ec28/assets:\n",
+      "vocab_compute_and_apply_vocabulary_vocabulary\n",
+      "\n",
+      "./out/tfrecorder-20201022-133624-to-tfr/tft_tmp/tftransform_tmp/220e294d5b9a44ba90e84f357b24ec28/variables:\n",
+      "\n",
+      "./out/tfrecorder-20201022-133624-to-tfr/tft_tmp/tftransform_tmp/91b0706c6a334575b676aa43a21b29ea:\n",
+      "saved_model.pb \u001B[1m\u001B[34m\u001B[47mvariables\u001B[m\u001B[m\n",
+      "\n",
+      "./out/tfrecorder-20201022-133624-to-tfr/tft_tmp/tftransform_tmp/91b0706c6a334575b676aa43a21b29ea/variables:\n",
+      "\n",
+      "./out/tfrecorder-20201022-133624-to-tfr/tft_tmp/tftransform_tmp/ac1204e611dd42228c5a4a04d5dd92ef:\n",
+      "saved_model.pb \u001B[1m\u001B[34m\u001B[47mvariables\u001B[m\u001B[m\n",
+      "\n",
+      "./out/tfrecorder-20201022-133624-to-tfr/tft_tmp/tftransform_tmp/ac1204e611dd42228c5a4a04d5dd92ef/variables:\n",
+      "\n",
+      "./out/tfrecorder-20201022-133624-to-tfr/transform_fn:\n",
+      "\u001B[1m\u001B[34m\u001B[47massets\u001B[m\u001B[m         saved_model.pb \u001B[1m\u001B[34m\u001B[47mvariables\u001B[m\u001B[m\n",
+      "\n",
+      "./out/tfrecorder-20201022-133624-to-tfr/transform_fn/assets:\n",
+      "vocab_compute_and_apply_vocabulary_vocabulary\n",
+      "\n",
+      "./out/tfrecorder-20201022-133624-to-tfr/transform_fn/variables:\n",
+      "\n",
+      "./out/tfrecorder-20201022-133624-to-tfr/transformed_metadata:\n",
+      "schema.pbtxt\n",
+      "\n",
+      "./out/tfrecorder-20201027-173444-to-tfr:\n",
+      "discarded-data-00000-of-00001         \u001B[1m\u001B[34m\u001B[47mtransform_fn\u001B[m\u001B[m\n",
+      "test-00000-of-00001.tfrecord.gz       \u001B[1m\u001B[34m\u001B[47mtransformed_metadata\u001B[m\u001B[m\n",
+      "\u001B[1m\u001B[34m\u001B[47mtft_tmp\u001B[m\u001B[m                               validation-00000-of-00001.tfrecord.gz\n",
+      "train-00000-of-00001.tfrecord.gz\n",
+      "\n",
+      "./out/tfrecorder-20201027-173444-to-tfr/tft_tmp:\n",
+      "\u001B[1m\u001B[34m\u001B[47mtftransform_tmp\u001B[m\u001B[m\n",
+      "\n",
+      "./out/tfrecorder-20201027-173444-to-tfr/tft_tmp/tftransform_tmp:\n",
+      "\u001B[1m\u001B[34m\u001B[47m4c3803300da547198d92c67e7e22b276\u001B[m\u001B[m\n",
+      "\u001B[1m\u001B[34m\u001B[47m6a1eb8662ccc4409874be42eac9c8819\u001B[m\u001B[m\n",
+      "\u001B[1m\u001B[34m\u001B[47m7a7ae073f1924e0e9e25fbf07adc03bb\u001B[m\u001B[m\n",
+      "vocab_compute_and_apply_vocabulary_vocabulary\n",
+      "\n",
+      "./out/tfrecorder-20201027-173444-to-tfr/tft_tmp/tftransform_tmp/4c3803300da547198d92c67e7e22b276:\n",
+      "saved_model.pb \u001B[1m\u001B[34m\u001B[47mvariables\u001B[m\u001B[m\n",
+      "\n",
+      "./out/tfrecorder-20201027-173444-to-tfr/tft_tmp/tftransform_tmp/4c3803300da547198d92c67e7e22b276/variables:\n",
+      "\n",
+      "./out/tfrecorder-20201027-173444-to-tfr/tft_tmp/tftransform_tmp/6a1eb8662ccc4409874be42eac9c8819:\n",
+      "\u001B[1m\u001B[34m\u001B[47massets\u001B[m\u001B[m         saved_model.pb \u001B[1m\u001B[34m\u001B[47mvariables\u001B[m\u001B[m\n",
+      "\n",
+      "./out/tfrecorder-20201027-173444-to-tfr/tft_tmp/tftransform_tmp/6a1eb8662ccc4409874be42eac9c8819/assets:\n",
+      "vocab_compute_and_apply_vocabulary_vocabulary\n",
+      "\n",
+      "./out/tfrecorder-20201027-173444-to-tfr/tft_tmp/tftransform_tmp/6a1eb8662ccc4409874be42eac9c8819/variables:\n",
+      "\n",
+      "./out/tfrecorder-20201027-173444-to-tfr/tft_tmp/tftransform_tmp/7a7ae073f1924e0e9e25fbf07adc03bb:\n",
+      "saved_model.pb \u001B[1m\u001B[34m\u001B[47mvariables\u001B[m\u001B[m\n",
+      "\n",
+      "./out/tfrecorder-20201027-173444-to-tfr/tft_tmp/tftransform_tmp/7a7ae073f1924e0e9e25fbf07adc03bb/variables:\n",
+      "\n",
+      "./out/tfrecorder-20201027-173444-to-tfr/transform_fn:\n",
+      "\u001B[1m\u001B[34m\u001B[47massets\u001B[m\u001B[m         saved_model.pb \u001B[1m\u001B[34m\u001B[47mvariables\u001B[m\u001B[m\n",
+      "\n",
+      "./out/tfrecorder-20201027-173444-to-tfr/transform_fn/assets:\n",
+      "vocab_compute_and_apply_vocabulary_vocabulary\n",
+      "\n",
+      "./out/tfrecorder-20201027-173444-to-tfr/transform_fn/variables:\n",
+      "\n",
+      "./out/tfrecorder-20201027-173444-to-tfr/transformed_metadata:\n",
+      "schema.pbtxt\n",
+      "\n",
+      "./out/tfrecorder-20201028-160301-to-tfr:\n",
+      "discarded-data-00000-of-00001         \u001B[1m\u001B[34m\u001B[47mtransform_fn\u001B[m\u001B[m\n",
+      "test-00000-of-00001.tfrecord.gz       \u001B[1m\u001B[34m\u001B[47mtransformed_metadata\u001B[m\u001B[m\n",
+      "\u001B[1m\u001B[34m\u001B[47mtft_tmp\u001B[m\u001B[m                               validation-00000-of-00001.tfrecord.gz\n",
+      "train-00000-of-00001.tfrecord.gz\n",
+      "\n",
+      "./out/tfrecorder-20201028-160301-to-tfr/tft_tmp:\n",
+      "\u001B[1m\u001B[34m\u001B[47mtftransform_tmp\u001B[m\u001B[m\n",
+      "\n",
+      "./out/tfrecorder-20201028-160301-to-tfr/tft_tmp/tftransform_tmp:\n",
+      "\u001B[1m\u001B[34m\u001B[47m70019930e8eb4d1a8fe3d6b076f38e00\u001B[m\u001B[m\n",
+      "\u001B[1m\u001B[34m\u001B[47mb6f679e2f2784489a9b8dfe767fc702b\u001B[m\u001B[m\n",
+      "\u001B[1m\u001B[34m\u001B[47mbb0deba40f3e448289ca27fc9abdcf6a\u001B[m\u001B[m\n",
+      "vocab_compute_and_apply_vocabulary_vocabulary\n",
+      "\n",
+      "./out/tfrecorder-20201028-160301-to-tfr/tft_tmp/tftransform_tmp/70019930e8eb4d1a8fe3d6b076f38e00:\n",
+      "\u001B[1m\u001B[34m\u001B[47massets\u001B[m\u001B[m         saved_model.pb \u001B[1m\u001B[34m\u001B[47mvariables\u001B[m\u001B[m\n",
+      "\n",
+      "./out/tfrecorder-20201028-160301-to-tfr/tft_tmp/tftransform_tmp/70019930e8eb4d1a8fe3d6b076f38e00/assets:\n",
+      "vocab_compute_and_apply_vocabulary_vocabulary\n",
+      "\n",
+      "./out/tfrecorder-20201028-160301-to-tfr/tft_tmp/tftransform_tmp/70019930e8eb4d1a8fe3d6b076f38e00/variables:\n",
+      "\n",
+      "./out/tfrecorder-20201028-160301-to-tfr/tft_tmp/tftransform_tmp/b6f679e2f2784489a9b8dfe767fc702b:\n",
+      "saved_model.pb \u001B[1m\u001B[34m\u001B[47mvariables\u001B[m\u001B[m\n",
+      "\n",
+      "./out/tfrecorder-20201028-160301-to-tfr/tft_tmp/tftransform_tmp/b6f679e2f2784489a9b8dfe767fc702b/variables:\n",
+      "\n",
+      "./out/tfrecorder-20201028-160301-to-tfr/tft_tmp/tftransform_tmp/bb0deba40f3e448289ca27fc9abdcf6a:\n",
+      "saved_model.pb \u001B[1m\u001B[34m\u001B[47mvariables\u001B[m\u001B[m\n",
+      "\n",
+      "./out/tfrecorder-20201028-160301-to-tfr/tft_tmp/tftransform_tmp/bb0deba40f3e448289ca27fc9abdcf6a/variables:\n",
+      "\n",
+      "./out/tfrecorder-20201028-160301-to-tfr/transform_fn:\n",
+      "\u001B[1m\u001B[34m\u001B[47massets\u001B[m\u001B[m         saved_model.pb \u001B[1m\u001B[34m\u001B[47mvariables\u001B[m\u001B[m\n",
+      "\n",
+      "./out/tfrecorder-20201028-160301-to-tfr/transform_fn/assets:\n",
+      "vocab_compute_and_apply_vocabulary_vocabulary\n",
+      "\n",
+      "./out/tfrecorder-20201028-160301-to-tfr/transform_fn/variables:\n",
+      "\n",
+      "./out/tfrecorder-20201028-160301-to-tfr/transformed_metadata:\n",
+      "schema.pbtxt\n"
+     ]
+    }
+   ],
    "source": [
-    "!ls -R ./out"
+    "!ls -R $output_dir"
    ]
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
+   "metadata": {
+    "pycharm": {
+     "name": "#%% md\n"
+    }
+   },
    "source": [
     "# That's it!\n",
     "\n",
     "As you can see, TFRUtil has taken the supplied CSV and transformed it into TFRecords, ready for consumption, along with the transform function"
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "outputs": [],
+   "source": [],
+   "metadata": {
+    "collapsed": false,
+    "pycharm": {
+     "name": "#%%\n"
+    }
+   }
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "outputs": [],
+   "source": [],
+   "metadata": {
+    "collapsed": false,
+    "pycharm": {
+     "name": "#%%\n"
+    }
+   }
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
   }
  ],
  "metadata": {
@@ -94,9 +1068,9 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.7.7"
+   "version": "3.6.8"
   }
  },
  "nbformat": 4,
  "nbformat_minor": 4
-}
+}
\ No newline at end of file
diff --git a/samples/Convert-image-directory.ipynb b/samples/Convert-image-directory.ipynb
new file mode 100644
index 0000000..71a06b7
--- /dev/null
+++ b/samples/Convert-image-directory.ipynb
@@ -0,0 +1,200 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Convert image directory to TFRecord files"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%load_ext autoreload\n",
+    "%autoreload 2"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Error importing tfx_bsl_extension.arrow.array_util. Some tfx_bsl functionalities are not available"
+     ]
+    }
+   ],
+   "source": [
+    "import os\n",
+    "import pathlib\n",
+    "import requests\n",
+    "import shutil\n",
+    "import tempfile\n",
+    "\n",
+    "import pandas as pd\n",
+    "import tensorflow as tf\n",
+    "import tfrecorder"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Convert sample image directory"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "image_dir = pathlib.Path(os.getcwd())/'../tfrecorder/test_data/images'\n",
+    "assert image_dir.exists()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/javascript": [
+       "\n",
+       "        if (typeof window.interactive_beam_jquery == 'undefined') {\n",
+       "          var jqueryScript = document.createElement('script');\n",
+       "          jqueryScript.src = 'https://code.jquery.com/jquery-3.4.1.slim.min.js';\n",
+       "          jqueryScript.type = 'text/javascript';\n",
+       "          jqueryScript.onload = function() {\n",
+       "            var datatableScript = document.createElement('script');\n",
+       "            datatableScript.src = 'https://cdn.datatables.net/1.10.20/js/jquery.dataTables.min.js';\n",
+       "            datatableScript.type = 'text/javascript';\n",
+       "            datatableScript.onload = function() {\n",
+       "              window.interactive_beam_jquery = jQuery.noConflict(true);\n",
+       "              window.interactive_beam_jquery(document).ready(function($){\n",
+       "                \n",
+       "              });\n",
+       "            }\n",
+       "            document.head.appendChild(datatableScript);\n",
+       "          };\n",
+       "          document.head.appendChild(jqueryScript);\n",
+       "        } else {\n",
+       "          window.interactive_beam_jquery(document).ready(function($){\n",
+       "            \n",
+       "          });\n",
+       "        }"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/javascript": [
+       "\n",
+       "        var import_html = () => {\n",
+       "          ['https://raw.githubusercontent.com/PAIR-code/facets/1.0.0/facets-dist/facets-jupyter.html'].forEach(href => {\n",
+       "            var link = document.createElement('link');\n",
+       "            link.rel = 'import'\n",
+       "            link.href = href;\n",
+       "            document.head.appendChild(link);\n",
+       "          });\n",
+       "        }\n",
+       "        if ('import' in document.createElement('link')) {\n",
+       "          import_html();\n",
+       "        } else {\n",
+       "          var webcomponentScript = document.createElement('script');\n",
+       "          webcomponentScript.src = 'https://cdnjs.cloudflare.com/ajax/libs/webcomponentsjs/1.3.3/webcomponents-lite.js';\n",
+       "          webcomponentScript.type = 'text/javascript';\n",
+       "          webcomponentScript.onload = function(){\n",
+       "            import_html();\n",
+       "          };\n",
+       "          document.head.appendChild(webcomponentScript);\n",
+       "        }"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "{'job_id': 'DirectRunner', 'metrics': {'rows': 6, 'good_images': 6, 'bad_images': None}, 'tfrecord_dir': '/tmp/tfrecords/tfrecorder-20201027-173455-create-tfrecords'}\n"
+     ]
+    }
+   ],
+   "source": [
+    "output_dir = pathlib.Path('/tmp/tfrecords')\n",
+    "results = tfrecorder.convert(str(image_dir), output_dir)\n",
+    "print(results)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Load a TF dataset from generated TFRecords"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "datasets = tfrecorder.load(results['tfrecord_dir'])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {
+    "pycharm": {
+     "name": "#%%\n"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "dict_keys(['image', 'image_channels', 'image_height', 'image_name', 'image_width', 'label', 'split'])\n"
+     ]
+    }
+   ],
+   "source": [
+    "for x in datasets['TRAIN'].take(1):\n",
+    "  print(x.keys())"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.8"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
\ No newline at end of file
diff --git a/samples/Convert-structured-data.ipynb b/samples/Convert-structured-data.ipynb
new file mode 100644
index 0000000..0df4e36
--- /dev/null
+++ b/samples/Convert-structured-data.ipynb
@@ -0,0 +1,400 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Convert structured data to TFRecords "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%load_ext autoreload\n",
+    "%autoreload 2"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Error importing tfx_bsl_extension.arrow.array_util. Some tfx_bsl functionalities are not available"
+     ]
+    }
+   ],
+   "source": [
+    "import pandas as pd\n",
+    "import pathlib\n",
+    "\n",
+    "import tensorflow as tf\n",
+    "\n",
+    "import tfrecorder\n",
+    "from tfrecorder import input_schema\n",
+    "from tfrecorder import types"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Load [Titanic](https://www.openml.org/d/40945) dataset "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Downloading data from https://web.stanford.edu/class/archive/cs/cs109/cs109.1166/stuff/titanic.csv\n",
+      "49152/44225 [=================================] - 1s 12us/step\n"
+     ]
+    }
+   ],
+   "source": [
+    "data_path = pathlib.Path('/tmp/datasets/titanic.csv')\n",
+    "if not data_path.exists():\n",
+    "  tf.keras.utils.get_file(\n",
+    "      'titanic.csv',\n",
+    "      origin='https://web.stanford.edu/class/archive/cs/cs109/cs109.1166/stuff/titanic.csv',\n",
+    "      extract=False,\n",
+    "      cache_dir='/tmp', cache_subdir='datasets')\n",
+    "  \n",
+    "assert data_path.exists()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df = pd.read_csv(str(data_path))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Add `split` column "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Survived</th>\n",
+       "      <th>Pclass</th>\n",
+       "      <th>Name</th>\n",
+       "      <th>Sex</th>\n",
+       "      <th>Age</th>\n",
+       "      <th>Siblings/Spouses Aboard</th>\n",
+       "      <th>Parents/Children Aboard</th>\n",
+       "      <th>Fare</th>\n",
+       "      <th>split</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>0</td>\n",
+       "      <td>3</td>\n",
+       "      <td>Mr. Owen Harris Braund</td>\n",
+       "      <td>male</td>\n",
+       "      <td>22.0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>7.2500</td>\n",
+       "      <td>TRAIN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>Mrs. John Bradley (Florence Briggs Thayer) Cum...</td>\n",
+       "      <td>female</td>\n",
+       "      <td>38.0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>71.2833</td>\n",
+       "      <td>TRAIN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>1</td>\n",
+       "      <td>3</td>\n",
+       "      <td>Miss. Laina Heikkinen</td>\n",
+       "      <td>female</td>\n",
+       "      <td>26.0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>7.9250</td>\n",
+       "      <td>TRAIN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>Mrs. Jacques Heath (Lily May Peel) Futrelle</td>\n",
+       "      <td>female</td>\n",
+       "      <td>35.0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>53.1000</td>\n",
+       "      <td>TRAIN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>0</td>\n",
+       "      <td>3</td>\n",
+       "      <td>Mr. William Henry Allen</td>\n",
+       "      <td>male</td>\n",
+       "      <td>35.0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>8.0500</td>\n",
+       "      <td>TRAIN</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   Survived  Pclass                                               Name  \\\n",
+       "0         0       3                             Mr. Owen Harris Braund   \n",
+       "1         1       1  Mrs. John Bradley (Florence Briggs Thayer) Cum...   \n",
+       "2         1       3                              Miss. Laina Heikkinen   \n",
+       "3         1       1        Mrs. Jacques Heath (Lily May Peel) Futrelle   \n",
+       "4         0       3                            Mr. William Henry Allen   \n",
+       "\n",
+       "      Sex   Age  Siblings/Spouses Aboard  Parents/Children Aboard     Fare  \\\n",
+       "0    male  22.0                        1                        0   7.2500   \n",
+       "1  female  38.0                        1                        0  71.2833   \n",
+       "2  female  26.0                        0                        0   7.9250   \n",
+       "3  female  35.0                        1                        0  53.1000   \n",
+       "4    male  35.0                        0                        0   8.0500   \n",
+       "\n",
+       "   split  \n",
+       "0  TRAIN  \n",
+       "1  TRAIN  \n",
+       "2  TRAIN  \n",
+       "3  TRAIN  \n",
+       "4  TRAIN  "
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df['split'] = 'TRAIN'\n",
+    "df.head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Convert to TFRecords "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/javascript": [
+       "\n",
+       "        if (typeof window.interactive_beam_jquery == 'undefined') {\n",
+       "          var jqueryScript = document.createElement('script');\n",
+       "          jqueryScript.src = 'https://code.jquery.com/jquery-3.4.1.slim.min.js';\n",
+       "          jqueryScript.type = 'text/javascript';\n",
+       "          jqueryScript.onload = function() {\n",
+       "            var datatableScript = document.createElement('script');\n",
+       "            datatableScript.src = 'https://cdn.datatables.net/1.10.20/js/jquery.dataTables.min.js';\n",
+       "            datatableScript.type = 'text/javascript';\n",
+       "            datatableScript.onload = function() {\n",
+       "              window.interactive_beam_jquery = jQuery.noConflict(true);\n",
+       "              window.interactive_beam_jquery(document).ready(function($){\n",
+       "                \n",
+       "              });\n",
+       "            }\n",
+       "            document.head.appendChild(datatableScript);\n",
+       "          };\n",
+       "          document.head.appendChild(jqueryScript);\n",
+       "        } else {\n",
+       "          window.interactive_beam_jquery(document).ready(function($){\n",
+       "            \n",
+       "          });\n",
+       "        }"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/javascript": [
+       "\n",
+       "        var import_html = () => {\n",
+       "          ['https://raw.githubusercontent.com/PAIR-code/facets/1.0.0/facets-dist/facets-jupyter.html'].forEach(href => {\n",
+       "            var link = document.createElement('link');\n",
+       "            link.rel = 'import'\n",
+       "            link.href = href;\n",
+       "            document.head.appendChild(link);\n",
+       "          });\n",
+       "        }\n",
+       "        if ('import' in document.createElement('link')) {\n",
+       "          import_html();\n",
+       "        } else {\n",
+       "          var webcomponentScript = document.createElement('script');\n",
+       "          webcomponentScript.src = 'https://cdnjs.cloudflare.com/ajax/libs/webcomponentsjs/1.3.3/webcomponents-lite.js';\n",
+       "          webcomponentScript.type = 'text/javascript';\n",
+       "          webcomponentScript.onload = function(){\n",
+       "            import_html();\n",
+       "          };\n",
+       "          document.head.appendChild(webcomponentScript);\n",
+       "        }"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "results = tfrecorder.convert(\n",
+    "    df, \n",
+    "    './tfrecords', \n",
+    "    schema=input_schema.Schema({\n",
+    "        'Survived': types.IntegerInput,\n",
+    "        'Pclass': types.IntegerInput,\n",
+    "        'Name': types.StringInput,\n",
+    "        'Sex': types.StringInput,\n",
+    "        'Age': types.FloatInput,\n",
+    "        'Siblings/Spouses Aboard': types.IntegerInput,\n",
+    "        'Parents/Children Aboard': types.IntegerInput,\n",
+    "        'Fare': types.FloatInput,\n",
+    "        'split': types.SplitKey,\n",
+    "    })\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "{'job_id': 'DirectRunner', 'metrics': {'rows': 887, 'good_images': None, 'bad_images': None}, 'tfrecord_dir': './tfrecords/tfrecorder-20201027-173544-create-tfrecords'}\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(results)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Load a dataset from the generated TFRecord files "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "datasets = tfrecorder.load(results['tfrecord_dir'])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Age: <dtype: 'float32'>\n",
+      "Fare: <dtype: 'float32'>\n",
+      "Name: <dtype: 'string'>\n",
+      "Parents/Children Aboard: <dtype: 'int64'>\n",
+      "Pclass: <dtype: 'int64'>\n",
+      "Sex: <dtype: 'string'>\n",
+      "Siblings/Spouses Aboard: <dtype: 'int64'>\n",
+      "Survived: <dtype: 'int64'>\n",
+      "split: <dtype: 'string'>\n"
+     ]
+    }
+   ],
+   "source": [
+    "for x in datasets['TRAIN'].take(1):\n",
+    "  for k, v in x.items():\n",
+    "    print(f'{k}: {v.dtype}')"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.8"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/samples/Loading-a-TF-Dataset.ipynb b/samples/Loading-a-TF-Dataset.ipynb
index ef640fe..d73c147 100644
--- a/samples/Loading-a-TF-Dataset.ipynb
+++ b/samples/Loading-a-TF-Dataset.ipynb
@@ -8,7 +8,7 @@
     "\n",
     "This notebook briefly demonstrates how to load a TF Dataset from TFRecord files generated by TFRecorder.\n",
     "Note that currently, the TFRecord files must be in a directory on your local machine.\n",
-    "The directory is expected to have the following structure, based on TFRecorder's `create_tfrecords` default output:\n",
+    "The directory is expected to have the following structure, based on TFRecorder's `convert` default output:\n",
     "```\n",
     "tfrecord_dir/\n",
     "    train-*.tfrecord.gz\n",
@@ -79,7 +79,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Let's examine the contains of the training set "
+    "Let's examine the contains of the training set"
    ]
   },
   {
@@ -109,6 +109,25 @@
     "for d in train:\n",
     "  print(d['image_name'])"
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "outputs": [],
+   "source": [],
+   "metadata": {
+    "collapsed": false,
+    "pycharm": {
+     "name": "#%%\n"
+    }
+   }
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
   }
  ],
  "metadata": {
@@ -127,9 +146,9 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.7.3"
+   "version": "3.6.8"
   }
  },
  "nbformat": 4,
  "nbformat_minor": 4
-}
+}
\ No newline at end of file
diff --git a/samples/Using-TFRecorder-with-Google-Cloud-Dataflow.ipynb b/samples/Using-TFRecorder-with-Google-Cloud-Dataflow.ipynb
index 15c8f3e..3ee3d71 100644
--- a/samples/Using-TFRecorder-with-Google-Cloud-Dataflow.ipynb
+++ b/samples/Using-TFRecorder-with-Google-Cloud-Dataflow.ipynb
@@ -133,9 +133,9 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.7.8"
+   "version": "3.6.8"
   }
  },
  "nbformat": 4,
  "nbformat_minor": 4
-}
+}
\ No newline at end of file
diff --git a/setup.py b/setup.py
index 83517a6..fa50856 100644
--- a/setup.py
+++ b/setup.py
@@ -15,13 +15,13 @@
 # limitations under the License.
 
 """Package setup."""
-
+import sys
 from setuptools import find_packages
 from setuptools import setup
 
 
 # Semantic versioning (PEP 440)
-VERSION = '1.1.2'
+VERSION = '2.0'
 
 REQUIRED_PACKAGES = [
     "apache-beam[gcp] >= 2.22.0",
@@ -42,6 +42,10 @@
     "tensorflow_transform >= 0.22",
 ]
 
+if sys.version_info < (3,7,0,0,0):
+  print("Version less than 3.7, appending dataclasses")
+  REQUIRED_PACKAGES.append("dataclasses >= 0.5")
+
 
 setup(
     name='tfrecorder',
diff --git a/tfrecorder/__init__.py b/tfrecorder/__init__.py
index 391b2b9..df89039 100644
--- a/tfrecorder/__init__.py
+++ b/tfrecorder/__init__.py
@@ -15,7 +15,9 @@
 # limitations under the License.
 
 """Imports."""
+
 from tfrecorder import accessor
-from tfrecorder.client import create_tfrecords
-from tfrecorder.check import check_tfrecords
-from tfrecorder.dataset import load
+from tfrecorder.converter import convert
+from tfrecorder.dataset_loader import load
+from tfrecorder.converter import convert_and_load
+from tfrecorder.utils import inspect
diff --git a/tfrecorder/accessor.py b/tfrecorder/accessor.py
index f2a13b5..7ae67a6 100644
--- a/tfrecorder/accessor.py
+++ b/tfrecorder/accessor.py
@@ -25,9 +25,9 @@
 import pandas as pd
 from IPython.core import display
 
-from tfrecorder import client
+from tfrecorder import converter
 from tfrecorder import constants
-from tfrecorder import schema
+from tfrecorder import input_schema
 
 
 @pd.api.extensions.register_dataframe_accessor('tensorflow')
@@ -41,7 +41,7 @@ def __init__(self, pandas_obj):
   def to_tfr(
       self,
       output_dir: str,
-      schema_map: Dict[str, schema.SchemaMap] = schema.image_csv_schema,
+      schema: input_schema.Schema = input_schema.IMAGE_CSV_SCHEMA,
       runner: str = 'DirectRunner',
       project: Optional[str] = None,
       region: Optional[str] = None,
@@ -65,7 +65,7 @@ def to_tfr(
           num_shards=10)
 
     Args:
-      schema_map: A dict mapping column names to supported types.
+      schema: An instance of input_schema.Schema that describes the schema.
       output_dir: Local directory or GCS Location to save TFRecords to.
         Note: GCS required for DataflowRunner
       runner: Beam runner. Can be DirectRunner or  DataflowRunner.
@@ -86,10 +86,10 @@ def to_tfr(
         display.HTML(
             '<b>Logging output to /tmp/{} </b>'.format(constants.LOGFILE)))
 
-    r = client.create_tfrecords(
+    r = converter.convert(
         self._df,
         output_dir=output_dir,
-        schema_map=schema_map,
+        schema=schema,
         runner=runner,
         project=project,
         region=region,
diff --git a/tfrecorder/beam_image_test.py b/tfrecorder/beam_image_test.py
index 4da6d76..aaebc82 100644
--- a/tfrecorder/beam_image_test.py
+++ b/tfrecorder/beam_image_test.py
@@ -27,7 +27,9 @@
 
 from tfrecorder import beam_image
 from tfrecorder import test_utils
-from tfrecorder import schema
+from tfrecorder import input_schema
+
+RANDOM_SEED = 42
 
 
 class BeamImageTests(unittest.TestCase):
@@ -36,7 +38,8 @@ class BeamImageTests(unittest.TestCase):
   def setUp(self):
     self.pipeline = test_utils.get_test_pipeline()
     self.df = test_utils.get_test_df()
-    self.image_file = 'tfrecorder/test_data/images/cat/cat-640x853-1.jpg'
+    self.image_file = self.df.image_uri.sample(
+        random_state=RANDOM_SEED).values[0]
 
   def test_load(self):
     """Tests the image loading function."""
@@ -46,7 +49,7 @@ def test_load(self):
   def test_file_not_found_load(self):
     """Test loading an image that doesn't exist."""
     with self.assertRaises(OSError):
-      _ = beam_image.load('tfrecorder/test_data/images/cat/food.jpg')
+      _ = beam_image.load('/some/image/dir/food.jpg')
 
   def test_mode_to_channel(self):
     """Tests `mode_to_channel`."""
@@ -83,10 +86,8 @@ def test_extract_image_dofn(self):
 
     with self.pipeline as p:
 
-      converter = schema.get_tft_coder(['split', 'image_uri', 'label'],
-                                       schema.image_csv_schema)
-
-
+      schema = input_schema.IMAGE_CSV_SCHEMA
+      converter = schema.get_input_coder()
       extract_images_fn = beam_image.ExtractImagesDoFn('image_uri')
 
       data = (
diff --git a/tfrecorder/beam_pipeline.py b/tfrecorder/beam_pipeline.py
index 94157b7..7f77527 100644
--- a/tfrecorder/beam_pipeline.py
+++ b/tfrecorder/beam_pipeline.py
@@ -33,36 +33,10 @@
 from tensorflow_transform import beam as tft_beam
 
 from tfrecorder import beam_image
-from tfrecorder import common
-from tfrecorder import schema
+from tfrecorder import input_schema
 from tfrecorder import types
 
 
-def _get_job_name(job_label: str = None) -> str:
-  """Returns Beam runner job name.
-
-  Args:
-    job_label: A user defined string that helps define the job.
-
-  Returns:
-    A job name compatible with apache beam runners, including a time stamp to
-      insure uniqueness.
-  """
-
-  job_name = 'tfrecorder-' + common.get_timestamp()
-  if job_label:
-    job_label = job_label.replace('_', '-')
-    job_name += '-' + job_label
-
-  return job_name
-
-
-def _get_job_dir(output_path: str, job_name: str) -> str:
-  """Returns Beam processing job directory."""
-
-  return os.path.join(output_path, job_name)
-
-
 def _get_pipeline_options(
     runner: str,
     job_name: str,
@@ -102,13 +76,15 @@ def _partition_fn(
     split_key: str = 'split') -> int:
   """Returns index used to partition an element from a PCollection."""
   del unused_num_partitions
-  dataset_type = element[split_key].decode('utf-8')
+  dataset_type = element[split_key]
+  if isinstance(dataset_type, bytes):
+    dataset_type = element[split_key].decode('utf-8')
   try:
-    index = schema.SplitKeyType.allowed_values.index(dataset_type)
+    index = types.SplitKey.allowed_values.index(dataset_type)
   except ValueError as e:
     logging.warning('Unable to index dataset type %s: %s.',
                     dataset_type, str(e))
-    index = schema.SplitKeyType.allowed_values.index('DISCARD')
+    index = types.SplitKey.allowed_values.index('DISCARD')
   return index
 
 def _get_write_to_tfrecord(output_dir: str,
@@ -147,7 +123,7 @@ def _preprocessing_fn(inputs: Dict[str, Any],
 
   outputs = {}
   for name, supported_type in schema_map.items():
-    if supported_type.type_name == 'string_label':
+    if supported_type == types.StringLabel:
       outputs[name] = tft.compute_and_apply_vocabulary(inputs[name])
     else:
       outputs[name] = inputs[name]
@@ -188,13 +164,13 @@ def get_split_counts(df: pd.DataFrame, split_key: str):
 def _transform_and_write_tfr(
     dataset: pvalue.PCollection,
     tfr_writer: Callable[[], beam.io.tfrecordio.WriteToTFRecord],
-    raw_metadata: types.BeamDatasetMetadata,
+    metadata: types.BeamDatasetMetadata,
     preprocessing_fn: Optional[Callable] = None,
     transform_fn: Optional[types.TransformFn] = None,
     label: str = 'data'):
   """Applies TF Transform to dataset and outputs it as TFRecords."""
 
-  dataset_metadata = (dataset, raw_metadata)
+  dataset_metadata = (dataset, metadata)
 
   if transform_fn:
     transformed_dataset, transformed_metadata = (
@@ -222,29 +198,26 @@ def _transform_and_write_tfr(
 # pylint: disable=too-many-locals
 def build_pipeline(
     df: pd.DataFrame,
-    job_label: str,
+    job_dir: str,
     runner: str,
     project: str,
     region: str,
-    output_dir: str,
     compression: str,
     num_shards: int,
-    schema_map: Dict[str, collections.namedtuple],
+    schema: input_schema.Schema,
     tfrecorder_wheel: str,
     dataflow_options: Dict[str, Any]) -> beam.Pipeline:
   """Runs TFRecorder Beam Pipeline.
 
   Args:
     df: Pandas DataFrame
-    job_label: User description for the beam job.
+    job_dir: GCS or Local Path for output.
     runner: Beam Runner: (e.g. DataflowRunner, DirectRunner).
     project: GCP project ID (if DataflowRunner)
     region: GCP compute region (if DataflowRunner)
-    output_dir: GCS or Local Path for output.
     compression: gzip or None.
     num_shards: Number of shards.
-    schema_map: A schema map (Dictionary mapping Dataframe columns to types)
-     used to derive the input and target schema.
+    schema: A Schema object defining the input schema.
     tfrecorder_wheel: Path to TFRecorder wheel for DataFlow
     dataflow_options: Dataflow Runner Options (optional)
 
@@ -254,8 +227,7 @@ def build_pipeline(
   Note: These inputs must be validated upstream (by client.create_tfrecord())
   """
 
-  job_name = _get_job_name(job_label)
-  job_dir = _get_job_dir(output_dir, job_name)
+  _, job_name = os.path.split(job_dir)
   options = _get_pipeline_options(
       runner,
       job_name,
@@ -268,7 +240,7 @@ def build_pipeline(
   p = beam.Pipeline(options=options)
   with tft_beam.Context(temp_dir=os.path.join(job_dir, 'tft_tmp')):
 
-    converter = schema.get_tft_coder(df.columns, schema_map)
+    converter = schema.get_input_coder()
     flatten_rows = ToCSVRows()
 
     # Each element in the data PCollection will be a dict
@@ -282,7 +254,7 @@ def build_pipeline(
     )
 
     # Extract images if an image_uri key exists.
-    image_uri_key = schema.get_key(schema.ImageUriType, schema_map)
+    image_uri_key = schema.image_uri_key
     if image_uri_key:
       extract_images_fn = beam_image.ExtractImagesDoFn(image_uri_key)
 
@@ -291,8 +263,8 @@ def build_pipeline(
           | 'ReadImage' >> beam.ParDo(extract_images_fn)
       )
 
-    # If the schema contains a valid split key, partition the dataset.
-    split_key = schema.get_key(schema.SplitKeyType, schema_map)
+    # Get the split key from schema.
+    split_key = schema.split_key
 
     # Note: This will not always reflect actual number of samples per dataset
     # written as TFRecords. The succeeding `Partition` operation may mark
@@ -301,45 +273,43 @@ def build_pipeline(
     # file for that split, albeit empty.
     split_counts = get_split_counts(df, split_key)
 
-    # Raw metadata is the TFT metadata after image insertion but before TFT
-    # e.g Image columns have been added if necessary.
-    raw_metadata = schema.get_raw_metadata(df.columns, schema_map)
-
     # Require training set to be available in the input data. The transform_fn
     # and transformed_metadata will be generated from the training set and
     # applied to the other datasets, if any
-    assert 'TRAIN' in split_counts
+    if 'TRAIN' not in split_counts:
+      raise AttributeError('`TRAIN` set expected to be present in splits')
 
     # Split dataset into train, validation, test sets.
     partition_fn = functools.partial(_partition_fn, split_key=split_key)
     train_data, val_data, test_data, discard_data = (
         data | 'SplitDataset' >> beam.Partition(
-            partition_fn, len(schema.SplitKeyType.allowed_values)))
+            partition_fn, len(types.SplitKey.allowed_values)))
 
-    raw_schema_map = schema.get_raw_schema_map(schema_map=schema_map)
     preprocessing_fn = functools.partial(
         _preprocessing_fn,
-        schema_map=raw_schema_map)
+        schema_map=schema.pre_tft_schema_map)
 
     tfr_writer = functools.partial(
         _get_write_to_tfrecord, output_dir=job_dir, compress=compression,
         num_shards=num_shards)
 
+    pre_tft_metadata = schema.get_pre_tft_metadata()
+
     transform_fn = _transform_and_write_tfr(
         train_data, tfr_writer, preprocessing_fn=preprocessing_fn,
-        raw_metadata=raw_metadata,
+        metadata=pre_tft_metadata,
         label='Train')
 
     if 'VALIDATION' in split_counts:
       _transform_and_write_tfr(
           val_data, tfr_writer, transform_fn=transform_fn,
-          raw_metadata=raw_metadata,
+          metadata=pre_tft_metadata,
           label='Validation')
 
     if 'TEST' in split_counts:
       _transform_and_write_tfr(
           test_data, tfr_writer, transform_fn=transform_fn,
-          raw_metadata=raw_metadata,
+          metadata=pre_tft_metadata,
           label='Test')
 
     _ = (
diff --git a/tfrecorder/beam_pipeline_test.py b/tfrecorder/beam_pipeline_test.py
index 29d9ad2..3885485 100644
--- a/tfrecorder/beam_pipeline_test.py
+++ b/tfrecorder/beam_pipeline_test.py
@@ -26,11 +26,13 @@
 import apache_beam as beam
 import frozendict
 import tensorflow as tf
+import tensorflow_transform as tft
 from tensorflow_transform import beam as tft_beam
 
 from tfrecorder import beam_pipeline
-from tfrecorder import schema
+from tfrecorder import input_schema
 from tfrecorder import test_utils
+from tfrecorder import types
 
 
 # pylint: disable=protected-access
@@ -45,9 +47,9 @@ def test_processing_fn_with_int_label(self):
         'image_uri': 'gs://foo/bar.jpg',
         'label': 1}
     my_schema = frozendict.FrozenOrderedDict({
-        'split': schema.SplitKeyType,
-        'image_uri': schema.ImageUriType,
-        'label': schema.IntegerLabelType})
+        'split': types.SplitKey,
+        'image_uri': types.ImageUri,
+        'label': types.IntegerLabel})
 
     result = beam_pipeline._preprocessing_fn(element, schema_map=my_schema)
     self.assertEqual(element, result)
@@ -61,8 +63,8 @@ def test_processing_fn_with_string_label(self, mock_transform):
         'split': 'TRAIN',
         'image_uri': 'gs://foo/bar.jpg',
         'label': tf.constant('cat', dtype=tf.string)}
-    result = beam_pipeline._preprocessing_fn(element,
-                                             schema_map=schema.image_csv_schema)
+    result = beam_pipeline._preprocessing_fn(
+        element, schema_map=input_schema.IMAGE_CSV_SCHEMA.input_schema_map)
     result['label'] = result['label'].numpy()
     self.assertEqual(0, result['label'])
 
@@ -97,8 +99,9 @@ class GetSplitCountsTest(unittest.TestCase):
 
   def setUp(self):
     self.df = test_utils.get_test_df()
-    self.schema_map = schema.image_csv_schema
-    self.split_key = schema.get_key(schema.SplitKeyType, self.schema_map)
+    self.schema = input_schema.IMAGE_CSV_SCHEMA
+    self.schema_map = self.schema.input_schema_map
+    self.split_key = self.schema.split_key
 
   def test_all_splits(self):
     """Tests case where train, validation and test data exists"""
@@ -126,16 +129,18 @@ class TransformAndWriteTfrTest(unittest.TestCase):
 
   def setUp(self):
     self.pipeline = test_utils.get_test_pipeline()
-    self.raw_df = test_utils.get_raw_feature_df()
+    self.pre_tft_df = test_utils.get_pre_tft_feature_df()
     self.temp_dir_obj = tempfile.TemporaryDirectory(dir='/tmp', prefix='test-')
     self.test_dir = self.temp_dir_obj.name
     self.tfr_writer = functools.partial(
         beam_pipeline._get_write_to_tfrecord, output_dir=self.test_dir,
         compress='gzip', num_shards=2)
-    self.raw_schema = schema.get_raw_schema_map(schema.image_csv_schema)
-    self.raw_metadata = schema.get_raw_metadata(self.raw_df.columns,
-                                                self.raw_schema)
-    self.converter = schema.get_tft_coder(self.raw_df.columns, self.raw_schema)
+    self.schema = input_schema.Schema(
+        input_schema.IMAGE_CSV_SCHEMA.input_schema_map)
+    self.pre_tft_metadata = self.schema.get_pre_tft_metadata()
+    self.converter = tft.coders.CsvCoder(
+        list(self.schema.pre_tft_schema_map.keys()),
+        self.pre_tft_metadata.schema)
     self.transform_fn_path = ('./tfrecorder/test_data/sample_tfrecords')
 
   def tearDown(self):
@@ -153,15 +158,16 @@ def test_train(self):
 
     with self.pipeline as p:
       with tft_beam.Context(temp_dir=os.path.join(self.test_dir, 'tmp')):
-        df = self.raw_df[self.raw_df.split == 'TRAIN']
+        df = self.pre_tft_df[self.pre_tft_df.split == 'TRAIN']
         dataset = self._get_dataset(p, df)
-        preprocessing_fn = functools.partial(beam_pipeline._preprocessing_fn,
-                                             schema_map=self.raw_schema)
+        preprocessing_fn = functools.partial(
+            beam_pipeline._preprocessing_fn,
+            schema_map=self.schema.pre_tft_schema_map)
         transform_fn = (
             beam_pipeline._transform_and_write_tfr(
                 dataset, self.tfr_writer,
                 preprocessing_fn=preprocessing_fn,
-                raw_metadata=self.raw_metadata,
+                metadata=self.pre_tft_metadata,
                 label='Train'))
         _ = transform_fn | tft_beam.WriteTransformFn(self.test_dir)
 
@@ -179,12 +185,12 @@ def test_non_training(self):
     with self.pipeline as p:
       with tft_beam.Context(temp_dir=os.path.join(self.test_dir, 'tmp')):
 
-        df = self.raw_df[self.raw_df.split == 'TEST']
+        df = self.pre_tft_df[self.pre_tft_df.split == 'TEST']
         dataset = self._get_dataset(p, df)
         transform_fn = p | tft_beam.ReadTransformFn(self.transform_fn_path)
         beam_pipeline._transform_and_write_tfr(
             dataset, self.tfr_writer, transform_fn=transform_fn,
-            raw_metadata=self.raw_metadata, label='Test')
+            metadata=self.pre_tft_metadata, label='Test')
 
     self.assertFalse(glob.glob(os.path.join(self.test_dir, 'train*.gz')))
     self.assertFalse(glob.glob(os.path.join(self.test_dir, 'validation*.gz')))
diff --git a/tfrecorder/cli.py b/tfrecorder/cli.py
index 7afde2b..493e310 100644
--- a/tfrecorder/cli.py
+++ b/tfrecorder/cli.py
@@ -18,16 +18,16 @@
 
 import fire
 
-from tfrecorder import client
-from tfrecorder import check
+from tfrecorder import converter
+from tfrecorder import utils
 
 
 def main():
   """Entry point for command-line interface."""
 
   fire.Fire({
-      'create-tfrecords': client.create_tfrecords,
-      'check-tfrecords': check.check_tfrecords,
+      'convert': converter.convert,
+      'inspect': utils.inspect,
   })
 
 
diff --git a/tfrecorder/common.py b/tfrecorder/common.py
deleted file mode 100644
index 09df3e8..0000000
--- a/tfrecorder/common.py
+++ /dev/null
@@ -1,42 +0,0 @@
-# Lint as: python3
-
-# Copyright 2020 Google LLC.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Common utility functions."""
-
-from datetime import datetime
-import os
-
-import tensorflow as tf
-
-from tfrecorder import constants
-
-
-def get_timestamp() -> str:
-  """Returns current date and time as formatted string."""
-  return datetime.now().strftime('%Y%m%d-%H%M%S')
-
-
-def copy_logfile_to_gcs(logfile: str, output_dir: str):
-  """Copies a logfile from local to gcs storage."""
-  try:
-    with open(logfile, 'r') as log_reader:
-      out_log = os.path.join(output_dir, constants.LOGFILE)
-      with tf.io.gfile.GFile(out_log, 'w') as gcs_logfile:
-        log = log_reader.read()
-        gcs_logfile.write(log)
-  except FileNotFoundError as e:
-    raise FileNotFoundError("Unable to copy log file {} to gcs.".format(
-        e.filename)) from e
diff --git a/tfrecorder/common_test.py b/tfrecorder/common_test.py
deleted file mode 100644
index ea3fda5..0000000
--- a/tfrecorder/common_test.py
+++ /dev/null
@@ -1,53 +0,0 @@
-# Lint as: python3
-
-# Copyright 2020 Google LLC.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Tests for common."""
-
-import os
-import unittest
-import tempfile
-
-from tfrecorder import common
-from tfrecorder import constants
-
-
-class CopyLogTest(unittest.TestCase):
-  """Misc tests for _copy_logfile_to_gcs."""
-
-  def test_valid_copy(self):
-    """Test valid file copy."""
-    with tempfile.TemporaryDirectory() as tmpdirname:
-      text = 'log test log test'
-      infile = os.path.join(tmpdirname, 'foo.log')
-      with open(infile, 'w') as f:
-        f.write(text)
-      common.copy_logfile_to_gcs(infile, tmpdirname)
-
-      outfile = os.path.join(tmpdirname, constants.LOGFILE)
-      with open(outfile, 'r') as f:
-        data = f.read()
-        self.assertEqual(text, data)
-
-  def test_invalid_copy(self):
-    """Test invalid file copy."""
-    with tempfile.TemporaryDirectory() as tmpdirname:
-      infile = os.path.join(tmpdirname, 'foo.txt')
-      with self.assertRaises(FileNotFoundError):
-        common.copy_logfile_to_gcs(infile, tmpdirname)
-
-# pylint: disable=protected-access
-if __name__ == '__main__':
-  unittest.main()
diff --git a/tfrecorder/client.py b/tfrecorder/converter.py
similarity index 76%
rename from tfrecorder/client.py
rename to tfrecorder/converter.py
index a321005..5d46eea 100644
--- a/tfrecorder/client.py
+++ b/tfrecorder/converter.py
@@ -16,10 +16,10 @@
 
 """Provides a common interface for TFRecorder to DF Accessor and CLI.
 
-client.py provides create_tfrecords() to upstream clients including
+converter.py provides create_tfrecords() to upstream clients including
 the Pandas DataFrame Accessor (accessor.py) and the CLI (cli.py).
 """
-import collections
+
 import logging
 import os
 from typing import Any, Dict, Optional, Sequence, Tuple, Union
@@ -29,21 +29,25 @@
 import tensorflow as tf
 
 from tfrecorder import beam_pipeline
-from tfrecorder import common
+from tfrecorder import dataset_loader
 from tfrecorder import constants
-from tfrecorder import schema
+from tfrecorder import input_schema
+from tfrecorder import types
+from tfrecorder import utils
+
 
 # TODO(mikebernico) Add test for only one split_key.
-def _validate_data(df: pd.DataFrame,
-                   schema_map: Dict[str, collections.namedtuple]):
+def _validate_data(df: pd.DataFrame, schema: input_schema.Schema):
   """Verifies data is consistent with schema."""
 
-  for key, value in schema_map.items():
+  for key, value in schema.input_schema_map.items():
     _ = value # TODO(mikebernico) Implement type checking.
     if key not in df.columns:
+      schema_keys = list(schema.input_schema_map.keys())
       raise AttributeError(
           f'DataFrame does not contain expected column: {key}. '
-          f'Ensure header matches schema keys: {list(schema_map.keys())}.')
+          f'Ensure header matches schema keys: {schema_keys}.')
+
 
 def _validate_runner(
     runner: str,
@@ -79,7 +83,7 @@ def _path_split(filepath: str) -> Tuple[str, str]:
 
   if filepath.startswith(constants.GCS_PREFIX):
     _, path = filepath.split(constants.GCS_PREFIX)
-    head, tail = os.path.split(path)
+    head, tail = os.path.split(os.path.normpath(path))
     return constants.GCS_PREFIX + head, tail
 
   return os.path.split(filepath)
@@ -113,7 +117,7 @@ def _read_image_directory(image_dir: str) -> pd.DataFrame:
   """
 
   rows = []
-  split_values = schema.allowed_split_values
+  split_values = types.SplitKey.allowed_values
   for root, _, files in tf.io.gfile.walk(image_dir):
     if files:
       root_, label = _path_split(root)
@@ -127,13 +131,47 @@ def _read_image_directory(image_dir: str) -> pd.DataFrame:
         row = [split, image_uri, label]
         rows.append(row)
 
-  return pd.DataFrame(rows, columns=schema.image_csv_schema.keys())
+  return pd.DataFrame(
+      rows, columns=input_schema.IMAGE_CSV_SCHEMA.get_input_keys())
 
 
 def _is_directory(input_data) -> bool:
   """Returns True if `input_data` is a directory; False otherwise."""
 
-  return tf.io.gfile.isdir(input_data)
+  # Note: First check will flag if user has the necessary credentials
+  # to access the directory (if it is in GCS)
+  return tf.io.gfile.exists(input_data) and tf.io.gfile.isdir(input_data)
+
+
+def _get_job_name(job_label: str = None) -> str:
+  """Returns Beam runner job name.
+
+  Args:
+    job_label: A user defined string that helps define the job.
+
+  Returns:
+    A job name compatible with apache beam runners, including a time stamp to
+      insure uniqueness.
+  """
+
+  job_name = 'tfrecorder-' + utils.get_timestamp()
+  if job_label:
+    job_label = job_label.replace('_', '-')
+    job_name += '-' + job_label
+
+  return job_name
+
+
+def _get_job_dir(output_path: str, job_name: str) -> str:
+  """Returns Beam processing job directory."""
+
+  return os.path.join(output_path, job_name)
+
+
+def _get_dataflow_url(job_id: str, project: str, region: str) -> str:
+  """Returns Cloud DataFlow URL for Apache Beam job."""
+
+  return f'{constants.CONSOLE_DATAFLOW_URI}{region}/{job_id}?=project={project}'
 
 
 def read_csv(
@@ -143,7 +181,7 @@ def read_csv(
   """Returns a a Pandas DataFrame from a CSV file."""
 
   if header is None and not names:
-    names = list(schema.image_csv_schema.keys())
+    names = list(input_schema.IMAGE_CSV_SCHEMA.get_input_keys())
 
   with tf.io.gfile.GFile(csv_file) as f:
     return pd.read_csv(f, names=names, header=header)
@@ -203,13 +241,14 @@ def _configure_logging(logfile):
   tf_logger.handlers = []
   tf_logger.addHandler(handler)
 
+
 # pylint: disable=too-many-arguments
 # pylint: disable=too-many-locals
 
-def create_tfrecords(
+def convert(
     source: Union[str, pd.DataFrame],
-    output_dir: str,
-    schema_map: Dict[str, collections.namedtuple] = schema.image_csv_schema,
+    output_dir: str = './tfrecords',
+    schema: input_schema.Schema = input_schema.IMAGE_CSV_SCHEMA,
     header: Optional[Union[str, int, Sequence]] = 'infer',
     names: Optional[Sequence] = None,
     runner: str = 'DirectRunner',
@@ -217,7 +256,7 @@ def create_tfrecords(
     region: Optional[str] = None,
     tfrecorder_wheel: Optional[str] = None,
     dataflow_options: Optional[Dict[str, Any]] = None,
-    job_label: str = 'create-tfrecords',
+    job_label: str = 'convert',
     compression: Optional[str] = 'gzip',
     num_shards: int = 0) -> Dict[str, Any]:
   """Generates TFRecord files from given input data.
@@ -228,18 +267,19 @@ def create_tfrecords(
   Usage:
     import tfrecorder
 
-    job_id = tfrecorder.client.create_tfrecords(
+    job_id = tfrecorder.convert(
         train_df,
         output_dir='gcs://foo/bar/train',
-        runner='DirectFlowRunner)
+        runner='DirectRunner)
 
   Args:
     source: Pandas DataFrame, CSV file or image directory path.
     output_dir: Local directory or GCS Location to save TFRecords to.
-    schema_map: A dict mapping column names to supported types.
+    schema: An instance of input_schema.Schema.
     header: Indicates row/s to use as a header. Not used when `input_data` is
       a Pandas DataFrame.
       If 'infer' (default), header is taken from the first line of a CSV
+    names: List of column names to use for CSV or DataFrame input.
     runner: Beam runner. Can be 'DirectRunner' or 'DataFlowRunner'
     project: GCP project name (Required if DataflowRunner)
     region: GCP region name (Required if DataflowRunner)
@@ -259,24 +299,27 @@ def create_tfrecords(
 
   df = to_dataframe(source, header, names)
 
-  _validate_data(df, schema_map)
+  _validate_data(df, schema)
   _validate_runner(runner, project, region, tfrecorder_wheel)
 
   logfile = os.path.join('/tmp', constants.LOGFILE)
   _configure_logging(logfile)
 
+  job_name = _get_job_name(job_label)
+  job_dir = _get_job_dir(output_dir, job_name)
+
   p = beam_pipeline.build_pipeline(
       df,
-      job_label=job_label,
+      job_dir=job_dir,
       runner=runner,
       project=project,
       region=region,
-      output_dir=output_dir,
       compression=compression,
       num_shards=num_shards,
-      schema_map=schema_map,
+      schema=schema,
       tfrecorder_wheel=tfrecorder_wheel,
-      dataflow_options=dataflow_options)
+      dataflow_options=dataflow_options,
+  )
 
   result = p.run()
 
@@ -292,7 +335,6 @@ def create_tfrecords(
     good_image_count = _get_beam_metric(good_image_filter, result)
     bad_image_count = _get_beam_metric(bad_image_filter, result)
 
-    # TODO(mikebernico): Profile metric impact with larger dataset.
     metrics = {
         'rows': row_count,
         'good_images': good_image_count,
@@ -305,28 +347,27 @@ def create_tfrecords(
     }
     logging.info("Job Complete.")
 
-  else:
+  elif runner == 'DataflowRunner':
     logging.info("Using Dataflow Runner.")
-    # Construct Dataflow URL
-
     job_id = result.job_id()
-
-    url = (
-        constants.CONSOLE_DATAFLOW_URI +
-        region +
-        '/' +
-        job_id +
-        '?project=' +
-        project)
+    url = _get_dataflow_url(job_id, project, region)
     job_result = {
         'job_id': job_id,
-        'dataflow_url': url
+        'dataflow_url': url,
     }
+    # Copy the logfile to GCS output dir
+    utils.copy_logfile_to_gcs(logfile, output_dir)
 
-  logging.shutdown()
+  else:
+    raise ValueError(f'Unsupported runner: {runner}')
 
-  if runner == 'DataflowRunner':
-    # if this is a Dataflow job, copy the logfile to GCS
-    common.copy_logfile_to_gcs(logfile, output_dir)
+  job_result['tfrecord_dir'] = job_dir
 
   return job_result
+
+
+def convert_and_load(*args, **kwargs):
+  """Converts data into TFRecords and loads them as TF Datasets."""
+
+  job_result = convert(*args, **kwargs)
+  return dataset_loader.load(job_result['tfrecord_dir'])
diff --git a/tfrecorder/client_test.py b/tfrecorder/converter_test.py
similarity index 66%
rename from tfrecorder/client_test.py
rename to tfrecorder/converter_test.py
index c00c6e6..64d8bb4 100644
--- a/tfrecorder/client_test.py
+++ b/tfrecorder/converter_test.py
@@ -26,16 +26,36 @@
 
 import mock
 import pandas as pd
+import tensorflow as tf
 
-from tfrecorder import client
+from tfrecorder import beam_pipeline
+from tfrecorder import converter
+from tfrecorder import dataset_loader
 from tfrecorder import test_utils
-from tfrecorder import schema
+from tfrecorder import input_schema
 
 
 # pylint: disable=protected-access
 
 
-class ClientTest(unittest.TestCase):
+class IsDirectoryTest(unittest.TestCase):
+  """Tests `_is_directory`."""
+
+  def test_local_ok(self):
+    """Test function returns True on local directory."""
+
+    with tempfile.TemporaryDirectory() as dirname:
+      self.assertTrue(converter._is_directory(dirname))
+
+  def test_local_exists_but_not_dir(self):
+    """Test function returns False on local (non-directory) file."""
+
+    with tempfile.NamedTemporaryFile(prefix='test_', dir='/tmp') as f:
+      self.assertFalse(converter._is_directory(f.name))
+
+
+# TODO(cezequiel): Refactor to per-function test case classes
+class MiscTest(unittest.TestCase):
   """Misc tests for `client` module."""
 
   def setUp(self):
@@ -44,51 +64,52 @@ def setUp(self):
     self.test_project = 'foo'
     self.test_wheel = '/my/path/wheel.whl'
 
-  @mock.patch('tfrecorder.client.beam_pipeline')
-  def test_create_tfrecords_direct_runner(self, mock_beam):
+  @mock.patch.object(beam_pipeline, 'build_pipeline', autospec=True)
+  def test_create_tfrecords_direct_runner(self, _):
     """Tests `create_tfrecords` Direct case."""
-    mock_beam.build_pipeline().run().wait_until_finished.return_value = {
-        'rows':6}
-    r = client.create_tfrecords(
+    r = converter.convert(
         self.test_df,
         runner='DirectRunner',
         output_dir='/tmp/direct_runner')
-    self.assertTrue('metrics' in r)
+    self.assertCountEqual(r.keys(), ['job_id', 'metrics', 'tfrecord_dir'])
+    self.assertCountEqual(
+        r['metrics'].keys(), ['rows', 'good_images', 'bad_images'])
 
-  @mock.patch('tfrecorder.client.beam_pipeline')
-  def test_create_tfrecords_dataflow_runner(self, mock_beam):
+  @mock.patch.object(converter, '_get_dataflow_url')
+  @mock.patch.object(beam_pipeline, 'build_pipeline')
+  def test_create_tfrecords_dataflow_runner(self, mock_pipeline, mock_url):
     """Tests `create_tfrecords` Dataflow case."""
-    mock_beam.build_pipeline().run().job_id.return_value = 'foo_id'
-
+    job_id = 'foo_id'
+    dataflow_url = 'http://some/job/url'
+    mock_pipeline().run().job_id.return_value = job_id
+    mock_url.return_value = dataflow_url
     df2 = self.test_df.copy()
     df2['image_uri'] = 'gs://' + df2['image_uri']
 
     outdir = '/tmp/dataflow_runner'
-
-    expected = {
-        'job_id': 'foo_id',
-        'dataflow_url': 'https://console.cloud.google.com/dataflow/jobs/' +
-                        'us-central1/foo_id?project=foo'}
-
     os.makedirs(outdir, exist_ok=True)
-    r = client.create_tfrecords(
+    r = converter.convert(
         df2,
         runner='DataflowRunner',
         output_dir=outdir,
         region=self.test_region,
         project=self.test_project,
         tfrecorder_wheel=self.test_wheel)
-    self.assertEqual(r, expected)
+
+    self.assertCountEqual(r.keys(), ['job_id', 'dataflow_url', 'tfrecord_dir'])
+    self.assertEqual(r['job_id'], job_id)
+    self.assertEqual(r['dataflow_url'], dataflow_url)
+    self.assertRegex(r['tfrecord_dir'], fr'{outdir}/tfrecorder-.+-?.*')
 
   def test_path_split(self):
     """Tests `_path_split`."""
 
     filename = 'image_file.jpg'
-    dirpaths = ['/path/to/image/dir', 'gs://path/to/image/dir']
+    dirpaths = ['/path/to/image/dir/', 'gs://path/to/image/dir/']
     for dir_ in dirpaths:
       filepath = os.path.join(dir_, filename)
-      act_dirpath, act_filename = client._path_split(filepath)
-      self.assertEqual(act_dirpath, dir_)
+      act_dirpath, act_filename = converter._path_split(filepath)
+      self.assertEqual(act_dirpath, dir_.rsplit('/', 1)[0])
       self.assertEqual(act_filename, filename)
 
 
@@ -100,42 +121,40 @@ def setUp(self):
     self.test_region = 'us-central1'
     self.test_project = 'foo'
     self.test_wheel = '/my/path/wheel.whl'
-    self.test_schema_map = schema.image_csv_schema
+    self.test_schema = input_schema.IMAGE_CSV_SCHEMA
 
   def test_valid_dataframe(self):
     """Tests valid DataFrame input."""
-    self.assertIsNone(
-        client._validate_data(
-            self.test_df,
-            schema.image_csv_schema))
+    self.assertIsNone(converter._validate_data(self.test_df, self.test_schema))
 
   def test_missing_image(self):
     """Tests missing image column."""
     with self.assertRaises(AttributeError):
       df2 = self.test_df.copy()
       df2.drop('image_uri', inplace=True, axis=1)
-      client._validate_data(df2, schema.image_csv_schema)
+      converter._validate_data(df2, self.test_schema)
 
   def test_missing_label(self):
     """Tests missing label column."""
     with self.assertRaises(AttributeError):
       df2 = self.test_df.copy()
       df2.drop('label', inplace=True, axis=1)
-      client._validate_data(df2, schema.image_csv_schema)
+      converter._validate_data(df2, self.test_schema)
 
   def test_missing_split(self):
     """Tests missing split column."""
     split_key = 'split'
-    schema_keys = re.escape(str(list(self.test_schema_map.keys())))
+    schema_keys = re.escape(
+        str(list(self.test_schema.input_schema_map.keys())))
     regex = fr'^.+column: {split_key}.+keys: {schema_keys}.$'
     with self.assertRaisesRegex(AttributeError, regex):
       df2 = self.test_df.copy()
       df2.drop(split_key, inplace=True, axis=1)
-      client._validate_data(df2, schema.image_csv_schema)
+      converter._validate_data(df2, self.test_schema)
 
   def test_valid_runner(self):
     """Tests valid runner."""
-    self.assertIsNone(client._validate_runner(
+    self.assertIsNone(converter._validate_runner(
         runner='DirectRunner',
         project=self.test_project,
         region=self.test_region,
@@ -144,7 +163,7 @@ def test_valid_runner(self):
   def test_invalid_runner(self):
     """Tests invalid runner."""
     with self.assertRaises(AttributeError):
-      client._validate_runner(
+      converter._validate_runner(
           runner='FooRunner',
           project=self.test_project,
           region=self.test_region,
@@ -156,7 +175,7 @@ def test_gcs_path_with_dataflow_runner_missing_param(self):
     for p, r in [
         (None, self.test_region), (self.test_project, None), (None, None)]:
       with self.assertRaises(AttributeError) as context:
-        client._validate_runner(
+        converter._validate_runner(
             runner='DataflowRunner',
             project=p,
             region=r,
@@ -168,7 +187,7 @@ def test_gcs_path_with_dataflow_runner_missing_param(self):
   def test_gcs_path_with_dataflow_runner_missing_wheel(self):
     """Tests DataflowRunner with missing required whl path."""
     with self.assertRaises(AttributeError) as context:
-      client._validate_runner(
+      converter._validate_runner(
           runner='DataflowRunner',
           project=self.test_project,
           region=self.test_region,
@@ -202,40 +221,35 @@ class ReadImageDirectoryTest(unittest.TestCase):
 
   def setUp(self):
     self.image_data = test_utils.get_test_df()
-    self.split_key = schema.get_key(
-        schema.SplitKeyType, schema.image_csv_schema)
-    self.label_key = schema.get_key(
-        schema.StringLabelType, schema.image_csv_schema)
-    self.image_uri_key = schema.get_key(
-        schema.ImageUriType, schema.image_csv_schema)
     self.tempfiles = []
     self.tempdir = None
+    self.schema = input_schema.Schema(
+        input_schema.IMAGE_CSV_SCHEMA.input_schema_map)
 
   def tearDown(self):
     for fp in self.tempfiles:
       fp.close()
-
     self.tempdir.cleanup()
 
   def test_normal(self):
     """Tests conversion of expected directory structure on local machine."""
 
-    g = self.image_data.groupby([self.split_key, self.label_key])
+    g = self.image_data.groupby([self.schema.split_key, self.schema.label_key])
 
     self.tempdir = tempfile.TemporaryDirectory()
     rows = []
     for (split, label), indices in g.groups.items():
       dir_ = os.path.join(self.tempdir.name, split, label)
       os.makedirs(dir_)
-      for f in list(self.image_data.loc[indices, self.image_uri_key]):
+      for f in list(self.image_data.loc[indices, self.schema.image_uri_key]):
         _, name = os.path.split(f)
         fp = tempfile.NamedTemporaryFile(
             dir=dir_, suffix='.jpg', prefix=name)
         self.tempfiles.append(fp)
         rows.append([split, fp.name, label])
 
-    columns = list(schema.image_csv_schema.keys())
-    actual = client._read_image_directory(self.tempdir.name)
+    columns = list(input_schema.IMAGE_CSV_SCHEMA.get_input_keys())
+    actual = converter._read_image_directory(self.tempdir.name)
     actual.sort_values(by=columns, inplace=True)
     actual.reset_index(drop=True, inplace=True)
     expected = pd.DataFrame(rows, columns=columns)
@@ -255,14 +269,16 @@ def setUp(self):
   def test_valid_csv_no_header_no_names_specified(self):
     """Tests a valid CSV without a header and no header names given."""
     f = _make_csv_tempfile(self.sample_data)
-    actual = client.read_csv(f.name, header=None)
-    self.assertEqual(list(actual.columns), list(schema.image_csv_schema.keys()))
+    actual = converter.read_csv(f.name, header=None)
+    self.assertEqual(
+        list(actual.columns),
+        list(input_schema.IMAGE_CSV_SCHEMA.get_input_keys()))
     self.assertEqual(actual.values.tolist(), self.sample_data)
 
   def test_valid_csv_no_header_names_specified(self):
     """Tests valid CSV without a header, but header names are given."""
     f = _make_csv_tempfile(self.sample_data)
-    actual = client.read_csv(f.name, header=None, names=self.header)
+    actual = converter.read_csv(f.name, header=None, names=self.header)
     self.assertEqual(list(actual.columns), self.header)
     self.assertEqual(actual.values.tolist(), self.sample_data)
 
@@ -270,7 +286,7 @@ def test_valid_csv_with_header_no_names_specified(self):
     """Tests valid CSV with header, and no header names given (inferred)."""
 
     f = _make_csv_tempfile([self.header] + self.sample_data)
-    actual = client.read_csv(f.name)
+    actual = converter.read_csv(f.name)
     self.assertEqual(list(actual.columns), self.header)
     self.assertEqual(actual.values.tolist(), self.sample_data)
 
@@ -278,7 +294,7 @@ def test_valid_csv_with_header_names_specified(self):
     """Tests valid CSV with header, and header names given (override)."""
 
     f = _make_csv_tempfile([self.header] + self.sample_data)
-    actual = client.read_csv(f.name, names=self.header, header=0)
+    actual = converter.read_csv(f.name, names=self.header, header=0)
     self.assertEqual(list(actual.columns), self.header)
     self.assertEqual(actual.values.tolist(), self.sample_data)
 
@@ -291,34 +307,34 @@ def setUp(self) -> None:
     columns = sample_data.pop(0)
     self.input_df = pd.DataFrame(sample_data, columns=columns)
 
-  @mock.patch.object(client, 'read_csv', autospec=True)
+  @mock.patch.object(converter, 'read_csv', autospec=True)
   def test_input_csv(self, read_csv):
     """Tests valid input CSV file."""
     expected = self.input_df
     read_csv.return_value = expected
     f = _make_csv_tempfile(get_sample_image_csv_data())
-    actual = client.to_dataframe(f.name)
+    actual = converter.to_dataframe(f.name)
     pd.testing.assert_frame_equal(actual, expected)
 
   def test_input_dataframe_no_names_specified(self):
     """Tests valid input dataframe with no header names specified."""
-    actual = client.to_dataframe(self.input_df)
+    actual = converter.to_dataframe(self.input_df)
     pd.testing.assert_frame_equal(actual, self.input_df)
 
   def test_input_dataframe_with_header(self):
     """Tests valid input dataframe with header specified."""
     names = list(self.input_df.columns[0:-1])
-    actual = client.to_dataframe(self.input_df, names=names)
+    actual = converter.to_dataframe(self.input_df, names=names)
     pd.testing.assert_frame_equal(actual, self.input_df[names])
 
-  @mock.patch.object(client, '_read_image_directory')
+  @mock.patch.object(converter, '_read_image_directory')
   def test_input_image_dir(self, mock_fn):
     """Tests valid input image directory."""
 
     mock_fn.return_value = self.input_df
 
     with tempfile.TemporaryDirectory() as input_data:
-      actual = client.to_dataframe(input_data)
+      actual = converter.to_dataframe(input_data)
       pd.testing.assert_frame_equal(actual, self.input_df)
 
   def test_error_invalid_inputs(self):
@@ -326,7 +342,30 @@ def test_error_invalid_inputs(self):
     inputs = [0, 'not_a_csv_file', list(), dict()]
     for input_data in inputs:
       with self.assertRaises(ValueError):
-        client.to_dataframe(input_data)
+        converter.to_dataframe(input_data)
+
+
+class ConvertAndLoadTest(unittest.TestCase):
+  """Tests `convert_and_load`."""
+
+  def setUp(self):
+    self.tfrecord_dir = '/path/to/tfrecords'
+    self.dataset = tf.data.Dataset.from_tensor_slices([1, 2, 3])
+    self.datasets = {
+        'TRAIN': self.dataset,
+        'VALIDATION': self.dataset,
+        'TEST': self.dataset,
+    }
+
+  @mock.patch.object(dataset_loader, 'load', autospec=True)
+  @mock.patch.object(converter, 'convert', autospec=True)
+  def test_convert_and_load_normal(self, convert_fn, load_fn):
+    """Tests normal case."""
+    convert_fn.return_value = dict(tfrecord_dir=self.tfrecord_dir)
+    load_fn.return_value = self.datasets
+    source = '/path/to/data.csv'
+    datasets = converter.convert_and_load(source)
+    self.assertEqual(datasets, self.datasets)
 
 
 if __name__ == '__main__':
diff --git a/tfrecorder/dataset.py b/tfrecorder/dataset_loader.py
similarity index 92%
rename from tfrecorder/dataset.py
rename to tfrecorder/dataset_loader.py
index 7185a14..1fd0821 100644
--- a/tfrecorder/dataset.py
+++ b/tfrecorder/dataset_loader.py
@@ -23,7 +23,7 @@
 import tensorflow as tf
 import tensorflow_transform as tft
 
-from tfrecorder import schema
+from tfrecorder import types
 
 
 TRANSFORMED_METADATA_DIR = tft.TFTransformOutput.TRANSFORMED_METADATA_DIR
@@ -38,6 +38,10 @@
 def _validate_tfrecord_dir(tfrecord_dir: str):
   """Verifies that the TFRecord directory contains expected files."""
 
+  # Check that input is a valid directory.
+  if not os.path.isdir(tfrecord_dir):
+    raise ValueError(f'Not a directory: {tfrecord_dir}')
+
   # Check that TensorFlow Transform directories are present.
   for dirname in [TRANSFORMED_METADATA_DIR, TRANSFORM_FN_DIR]:
     if not os.path.isdir(os.path.join(tfrecord_dir, dirname)):
@@ -49,10 +53,11 @@ def _get_tfrecord_files_per_split(tfrecord_dir: str):
   """Returns TFRecord files for each split.
 
   The TFRecord filenames should have a prefix based on lowercase versions of
-  items in `schema.allowed_split_values`. DISCARD split is not checked.
+  items in `types.SplitKey.allowed_split_values`. DISCARD split is
+  not checked.
   """
   split_to_files = {}
-  for split in schema.allowed_split_values[:-1]:
+  for split in types.SplitKey.allowed_values[:-1]:
     prefix = split.lower()
     files = glob.glob(os.path.join(tfrecord_dir, prefix + '*'))
     if files:
diff --git a/tfrecorder/dataset_test.py b/tfrecorder/dataset_loader_test.py
similarity index 64%
rename from tfrecorder/dataset_test.py
rename to tfrecorder/dataset_loader_test.py
index 55195b7..28f189f 100644
--- a/tfrecorder/dataset_test.py
+++ b/tfrecorder/dataset_loader_test.py
@@ -14,15 +14,15 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-"""Tests for `dataset.py`."""
+"""Tests for `dataset_loader.py`."""
 
 import os
 import tempfile
 import unittest
 
-from tfrecorder import dataset
-from tfrecorder import schema
+from tfrecorder import dataset_loader
 from tfrecorder import test_utils
+from tfrecorder import types
 
 # pylint: disable=protected-access
 
@@ -38,28 +38,36 @@ def tearDown(self):
 
   def test_ok(self):
     """Checks that function works as expected when TFT dirs are present."""
-    os.makedirs(os.path.join(self.temp_dir, dataset.TRANSFORMED_METADATA_DIR))
-    os.makedirs(os.path.join(self.temp_dir, dataset.TRANSFORM_FN_DIR))
-    dataset._validate_tfrecord_dir(self.temp_dir)
+    os.makedirs(
+        os.path.join(self.temp_dir, dataset_loader.TRANSFORMED_METADATA_DIR))
+    os.makedirs(os.path.join(self.temp_dir, dataset_loader.TRANSFORM_FN_DIR))
+    dataset_loader._validate_tfrecord_dir(self.temp_dir)
 
   def test_missing_metadata_dir(self):
     """Check exception raised when metadata directory missing."""
 
     with self.assertRaises(FileNotFoundError):
-      os.makedirs(os.path.join(self.temp_dir, dataset.TRANSFORM_FN_DIR))
-      dataset._validate_tfrecord_dir(self.temp_dir)
+      os.makedirs(os.path.join(self.temp_dir, dataset_loader.TRANSFORM_FN_DIR))
+      dataset_loader._validate_tfrecord_dir(self.temp_dir)
 
   def test_missing_transform_fn_dir(self):
     """Check exception raised when transform_fn directory missing."""
     with self.assertRaises(FileNotFoundError):
       os.makedirs(
-          os.path.join(self.temp_dir, dataset.TRANSFORMED_METADATA_DIR))
-      dataset._validate_tfrecord_dir(self.temp_dir)
+          os.path.join(self.temp_dir, dataset_loader.TRANSFORMED_METADATA_DIR))
+      dataset_loader._validate_tfrecord_dir(self.temp_dir)
 
   def test_missing_tf_transform_dirs(self):
     """Check exception raised when both TFT transform directories missing."""
     with self.assertRaises(FileNotFoundError):
-      dataset._validate_tfrecord_dir(self.temp_dir)
+      dataset_loader._validate_tfrecord_dir(self.temp_dir)
+
+  def test_not_dir(self):
+    """Check exception raised when input is not a valid directory."""
+
+    input_dir = '/some/non-existent/dir'
+    with self.assertRaisesRegex(ValueError, 'Not a directory:'):
+      dataset_loader._validate_tfrecord_dir(input_dir)
 
 
 class LoadTest(unittest.TestCase):
@@ -70,10 +78,10 @@ def setUp(self):
 
   def test_load_all_splits(self):
     """Test case where all TFRecord splits can be loaded."""
-    dataset_dict = dataset.load(self.tfrecord_dir)
+    dataset_dict = dataset_loader.load(self.tfrecord_dir)
     self.assertEqual(len(dataset_dict), 3)
     self.assertCountEqual(
-        list(dataset_dict.keys()), schema.allowed_split_values[:-1])
+        list(dataset_dict.keys()), types.SplitKey.allowed_values[:-1])
 
 
 if __name__ == '__main__':
diff --git a/tfrecorder/input_schema.py b/tfrecorder/input_schema.py
new file mode 100644
index 0000000..b32bccf
--- /dev/null
+++ b/tfrecorder/input_schema.py
@@ -0,0 +1,102 @@
+# Lint as: python3
+
+# Copyright 2020 Google LLC.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Defines input types for TFRecorder's input schema."""
+
+from typing import Dict
+
+import tensorflow as tf
+import tensorflow_transform as tft
+from tensorflow_transform.tf_metadata import dataset_metadata
+from tensorflow_transform.tf_metadata import schema_utils
+
+from tfrecorder import types
+
+
+
+class Schema:
+  """Defines a TFRecorder input schema."""
+  def __init__(self, schema_map: Dict[str, types.SupportedType]) -> None:
+    """Defines TFRecorder input schema.
+
+    Args:
+      schema_map: An ordered dictionary that maps input columns to
+        TFRecorder supported types.
+    """
+    self.split_key = None
+    self.image_uri_key = None
+    self.label_key = None
+    self.input_schema_map = schema_map
+    self.pre_tft_schema_map = {}
+
+    for k, v in schema_map.items():
+      if v == types.SplitKey:
+        self.split_key = k
+      if 'Label' in v.__name__: # Matches any label type
+        self.label_key = k
+
+      if v == types.ImageUri:
+        self.image_uri_key = k
+        # if an image key is present, add image features to pre tft schema
+        self.pre_tft_schema_map['image_name'] = types.ImageSupportString
+        self.pre_tft_schema_map['image'] = types.ImageSupportString
+        self.pre_tft_schema_map['image_height'] = types.ImageDim
+        self.pre_tft_schema_map['image_width'] = types.ImageDim
+        self.pre_tft_schema_map['image_channels'] = types.ImageDim
+      else:
+        self.pre_tft_schema_map[k] = schema_map[k]
+
+    if not self.split_key:
+      raise AttributeError("Schema must contain a split key.")
+
+  @staticmethod
+  def _get_feature_spec(
+      schema_map: Dict[str, types.SupportedType]
+      ) -> Dict[str, tf.io.FixedLenFeature]:
+    """Gets map of column names to tf.io.FixedLenFeatures for TFT."""
+    return {k: v.feature_spec for k, v in schema_map.items()}
+
+  @staticmethod
+  def _get_metadata(
+      feature_spec: Dict[str, tf.io.FixedLenFeature]
+      ) -> types.BeamDatasetMetadata:
+    """Gets DatasetMetadata."""
+    return dataset_metadata.DatasetMetadata(
+        schema_utils.schema_from_feature_spec(feature_spec))
+
+  def get_pre_tft_metadata(self) -> types.BeamDatasetMetadata:
+    """Gets pre TFT metadata, used by TFT external to this class."""
+    feature_spec = self._get_feature_spec(self.pre_tft_schema_map)
+    return self._get_metadata(feature_spec)
+
+  def get_input_coder(self) -> tft.coders.CsvCoder:
+    """Gets input schema TFT CSV Coder."""
+    feature_spec = self._get_feature_spec(self.input_schema_map)
+    metadata = self._get_metadata(feature_spec)
+    return tft.coders.CsvCoder(list(self.input_schema_map.keys()),
+                               metadata.schema)
+
+  def get_input_keys(self):
+    """Returns keys for input_schema_map as list."""
+    return self.input_schema_map.keys()
+
+# Built in / Default schema map.
+image_csv_schema_map = {
+    'split': types.SplitKey,
+    'image_uri': types.ImageUri,
+    'label': types.StringLabel}
+
+IMAGE_CSV_SCHEMA = Schema(image_csv_schema_map)
diff --git a/tfrecorder/schema_test.py b/tfrecorder/input_schema_test.py
similarity index 58%
rename from tfrecorder/schema_test.py
rename to tfrecorder/input_schema_test.py
index c0d2d5a..f2b9eec 100644
--- a/tfrecorder/schema_test.py
+++ b/tfrecorder/input_schema_test.py
@@ -19,42 +19,47 @@
 import unittest
 import tensorflow_transform as tft
 
-from tfrecorder import schema
+from tfrecorder import input_schema
 
 
-class SchemaTest(unittest.TestCase):
+class InputSchemaTest(unittest.TestCase):
   """Tests for type module."""
 
-  def test_valid_get_tft_coder(self):
-    """Tests a valid call on get_tft_coder."""
-    columns = ['split', 'image_uri', 'label']
-    converter = schema.get_tft_coder(columns, schema.image_csv_schema)
+  def setUp(self):
+    self.schema = input_schema.Schema(input_schema.image_csv_schema_map)
+
+  def test_valid_get_input_coder(self):
+    """Tests a valid call on get_input_coder."""
+    converter = self.schema.get_input_coder()
     self.assertIsInstance(converter, tft.coders.CsvCoder)
 
   def test_valid_get_key(self):
     """Tests a valid split key."""
-    key = schema.get_key(schema.SplitKeyType, schema.image_csv_schema)
-    self.assertEqual(key, 'split')
+    self.assertEqual(self.schema.split_key, 'split')
 
   def test_no_get_split_key(self):
     """Tests no split key present."""
-    test_schema = dict()
-    for k, v in schema.image_csv_schema.items():
+    test_schema_map = dict()
+    for k, v in input_schema.IMAGE_CSV_SCHEMA.input_schema_map.items():
       # Brute force copy because OG is a FrozenOrderedDict.
       if k != 'split':
-        test_schema[k] = v
+        test_schema_map[k] = v
 
-    key = schema.get_key(schema.SplitKeyType, test_schema)
-    self.assertIsNone(key)
+    with self.assertRaises(AttributeError):
+      _ = input_schema.Schema(test_schema_map)
 
   def test_get_raw_metadata(self):
     """Tests a valid call to get_raw_metadata."""
-    columns = ['split', 'image_uri', 'label']
-    raw_metadata = schema.get_raw_metadata(columns, schema.image_csv_schema)
+    pre_tft_metadata = self.schema.get_pre_tft_metadata()
     self.assertIsInstance(
-        raw_metadata,
+        pre_tft_metadata,
         tft.tf_metadata.dataset_metadata.DatasetMetadata)
 
+  def test_get_input_keys(self):
+    """"Tests get_input_keys() function."""
+    schema = input_schema.IMAGE_CSV_SCHEMA
+    self.assertEqual(schema.input_schema_map.keys(), schema.get_input_keys())
+
 
 if __name__ == '__main__':
   unittest.main()
diff --git a/tfrecorder/schema.py b/tfrecorder/schema.py
deleted file mode 100644
index 13ad3fd..0000000
--- a/tfrecorder/schema.py
+++ /dev/null
@@ -1,187 +0,0 @@
-# Lint as: python3
-
-# Copyright 2020 Google LLC.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Defines input types for TFRecorder's input schema."""
-
-import collections
-from typing import Dict, List, Union
-
-import frozendict
-import tensorflow as tf
-import tensorflow_transform as tft
-from tensorflow_transform.tf_metadata import dataset_metadata
-from tensorflow_transform.tf_metadata import schema_utils
-
-# TODO(mikebernico): Refactor types into data classes
-# All supported types will be based on SupportedType.
-SupportedType = collections.namedtuple(
-    'tfrecordInputType',
-    ['type_name', 'feature_spec', 'allowed_values'])
-
-# Supported type definitions here.
-ImageUriType = SupportedType(
-    type_name='image_uri',
-    feature_spec=tf.io.FixedLenFeature([], tf.string),
-    allowed_values=None)
-
-# Note: split_key is an immutable type and these allowed values cannot change.
-allowed_split_values = ['TRAIN', 'VALIDATION', 'TEST', 'DISCARD']
-SplitKeyType = SupportedType(
-    type_name='split_key',
-    feature_spec=tf.io.FixedLenFeature([], tf.string),
-    allowed_values=allowed_split_values)
-
-#TODO(mikebernico): Implement in preprocess_fn
-IntegerInputType = SupportedType(
-    type_name='integer_input',
-    feature_spec=tf.io.FixedLenFeature([], tf.int64),
-    allowed_values=None)
-
-#TODO(mikebernico): Implement in preprocess_fn
-FloatInputType = SupportedType(
-    type_name='float_input',
-    feature_spec=tf.io.FixedLenFeature([], tf.float64),
-    allowed_values=None)
-
-#TODO(mikebernico): Implement in preprocess_fn
-CategoricalInputType = SupportedType(
-    type_name='categorical_input',
-    feature_spec=tf.io.FixedLenFeature([], tf.string),
-    allowed_values=None)
-
-IntegerLabelType = SupportedType(
-    type_name='integer_label',
-    feature_spec=tf.io.FixedLenFeature([], tf.int64),
-    allowed_values=None)
-
-StringLabelType = SupportedType(
-    type_name='string_label',
-    feature_spec=tf.io.FixedLenFeature([], tf.string),
-    allowed_values=None)
-
-ImageSupportStringType = SupportedType(
-    type_name='image_support_string',
-    feature_spec=tf.io.FixedLenFeature([], tf.string),
-    allowed_values=None)
-
-ImageSupportIntType = SupportedType(
-    type_name='image_support_int',
-    feature_spec=tf.io.FixedLenFeature([], tf.int64),
-    allowed_values=None)
-
-# TODO(mikebernico): Refactor schema_map to a container class.
-# Default schema supports the legacy image_csv format.
-SchemaMap = Dict[str, SupportedType]
-
-image_csv_schema = frozendict.FrozenOrderedDict({
-    'split': SplitKeyType,
-    'image_uri': ImageUriType,
-    'label': StringLabelType})
-
-
-def get_raw_schema_map(
-    schema_map: Dict[str, collections.namedtuple]
-    ) -> Dict[str, collections.namedtuple]:
-  """Converts a schema to a raw (pre TFT / post image extraction) schema."""
-  raw_schema = {}
-  for k, v in schema_map.items():
-    if v.type_name == 'image_uri':
-      raw_schema['image_name'] = ImageSupportStringType
-      raw_schema['image'] = ImageSupportStringType
-      raw_schema['image_height'] = ImageSupportIntType
-      raw_schema['image_width'] = ImageSupportIntType
-      raw_schema['image_channels'] = ImageSupportIntType
-    else:
-      raw_schema[k] = schema_map[k]
-  return raw_schema
-
-
-def get_tft_coder(
-    columns: List[str],
-    schema_map: Dict[str, collections.namedtuple]
-    ) -> tft.coders.CsvCoder:
-  """Gets a TFT CSV Coder.
-
-  Args:
-    columns: Ordered DataFrame column names, from df.column.
-    schema_map: Schema map used to infer the schema.
-
-  Returns:
-    tft.coders.CsvCoder
-  """
-  feature_spec = {}
-  # Because the DF column name order may not match the feature_spec order
-  # This maps existing column names to their feature spec (required part of
-  # namedtuple)
-  for col in columns:
-    feature_spec[col] = schema_map[col].feature_spec
-
-  metadata = dataset_metadata.DatasetMetadata(
-      schema_utils.schema_from_feature_spec(feature_spec))
-
-  return tft.coders.CsvCoder(columns, metadata.schema)
-
-
-def get_key(
-    type_: SupportedType,
-    schema_map: Dict[str, collections.namedtuple]) -> Union[str, None]:
-  """Gets first instance of key of type 'type_name' from schema map.
-
-  Returns key name if present, otherwise returns None.
-  """
-  #TODO(mikebernico): Fix so that multiples of a key type work in future.
-  for k, v in schema_map.items():
-    if v.type_name == type_.type_name:
-      return k
-  return None
-
-
-def get_raw_feature_spec(columns: List[str],
-                         schema_map: Dict[str, collections.namedtuple]
-                         ) -> Dict[str, tf.io.FixedLenFeature]:
-  """Gets RAW (pre TFT) feature spec."""
-
-  feature_spec = dict()
-
-  # Because the DF column name order may not match the feature_spec order
-  # this maps existing column names to their feature spec (req part of
-  # namedtuple)
-  for col in columns:
-    if schema_map[col].type_name == 'image_uri':
-      # Modify feature_spec for extracted image, don't include image_uri.
-      # TODO(mikebernico) This only works in the case where the input has
-      # ONLY 1 image. Generalize to multiple images someday?
-      feature_spec['image_name'] = tf.io.FixedLenFeature([], tf.string)
-      feature_spec['image'] = tf.io.FixedLenFeature([], tf.string)
-      feature_spec['image_height'] = tf.io.FixedLenFeature([], tf.int64)
-      feature_spec['image_width'] = tf.io.FixedLenFeature([], tf.int64)
-      feature_spec['image_channels'] = tf.io.FixedLenFeature([], tf.int64)
-    else:
-      # Copy feature as-is.
-      feature_spec[col] = schema_map[col].feature_spec
-  return feature_spec
-
-
-def get_raw_metadata(columns: List[str],
-                     schema_map: Dict[str, collections.namedtuple]
-                     ) -> dataset_metadata.DatasetMetadata:
-  """Returns metadata prior to TF Transform preprocessing
-
-  Note: takes base schema_map as input, not raw_schema_map.
-  """
-  feature_spec = get_raw_feature_spec(columns, schema_map)
-  return dataset_metadata.DatasetMetadata(
-      schema_utils.schema_from_feature_spec(feature_spec))
diff --git a/tfrecorder/test_data/data.csv b/tfrecorder/test_data/data.csv
index dfaf143..9991050 100644
--- a/tfrecorder/test_data/data.csv
+++ b/tfrecorder/test_data/data.csv
@@ -1,7 +1,7 @@
 split,image_uri,label
-TRAIN,tfrecorder/test_data/images/cat/cat-640x853-1.jpg,cat
-VALIDATION,tfrecorder/test_data/images/cat/cat-800x600-2.jpg,cat
-TEST,tfrecorder/test_data/images/cat/cat-800x600-3.jpg,cat
-TRAIN,tfrecorder/test_data/images/goat/goat-640x640-1.jpg,goat
-VALIDATION,tfrecorder/test_data/images/goat/goat-320x320-2.jpg,goat
-TEST,tfrecorder/test_data/images/goat/goat-640x427-3.jpg,goat
\ No newline at end of file
+TEST,tfrecorder/test_data/images/TEST/cat/cat-800x600-3.jpg,cat
+TEST,tfrecorder/test_data/images/TEST/goat/goat-640x427-3.jpg,goat
+TRAIN,tfrecorder/test_data/images/TRAIN/cat/cat-640x853-1.jpg,cat
+TRAIN,tfrecorder/test_data/images/TRAIN/goat/goat-640x640-1.jpg,goat
+VALIDATION,tfrecorder/test_data/images/VALIDATION/cat/cat-800x600-2.jpg,cat
+VALIDATION,tfrecorder/test_data/images/VALIDATION/goat/goat-320x320-2.jpg,goat
diff --git a/tfrecorder/test_data/images/cat/cat-800x600-3.jpg b/tfrecorder/test_data/images/TEST/cat/cat-800x600-3.jpg
similarity index 100%
rename from tfrecorder/test_data/images/cat/cat-800x600-3.jpg
rename to tfrecorder/test_data/images/TEST/cat/cat-800x600-3.jpg
diff --git a/tfrecorder/test_data/images/goat/goat-640x427-3.jpg b/tfrecorder/test_data/images/TEST/goat/goat-640x427-3.jpg
similarity index 100%
rename from tfrecorder/test_data/images/goat/goat-640x427-3.jpg
rename to tfrecorder/test_data/images/TEST/goat/goat-640x427-3.jpg
diff --git a/tfrecorder/test_data/images/cat/cat-640x853-1.jpg b/tfrecorder/test_data/images/TRAIN/cat/cat-640x853-1.jpg
similarity index 100%
rename from tfrecorder/test_data/images/cat/cat-640x853-1.jpg
rename to tfrecorder/test_data/images/TRAIN/cat/cat-640x853-1.jpg
diff --git a/tfrecorder/test_data/images/goat/goat-640x640-1.jpg b/tfrecorder/test_data/images/TRAIN/goat/goat-640x640-1.jpg
similarity index 100%
rename from tfrecorder/test_data/images/goat/goat-640x640-1.jpg
rename to tfrecorder/test_data/images/TRAIN/goat/goat-640x640-1.jpg
diff --git a/tfrecorder/test_data/images/cat/cat-800x600-2.jpg b/tfrecorder/test_data/images/VALIDATION/cat/cat-800x600-2.jpg
similarity index 100%
rename from tfrecorder/test_data/images/cat/cat-800x600-2.jpg
rename to tfrecorder/test_data/images/VALIDATION/cat/cat-800x600-2.jpg
diff --git a/tfrecorder/test_data/images/goat/goat-320x320-2.jpg b/tfrecorder/test_data/images/VALIDATION/goat/goat-320x320-2.jpg
similarity index 100%
rename from tfrecorder/test_data/images/goat/goat-320x320-2.jpg
rename to tfrecorder/test_data/images/VALIDATION/goat/goat-320x320-2.jpg
diff --git a/tfrecorder/test_utils.py b/tfrecorder/test_utils.py
index 88b8f8f..8472c49 100644
--- a/tfrecorder/test_utils.py
+++ b/tfrecorder/test_utils.py
@@ -26,7 +26,7 @@
 from apache_beam.testing import test_pipeline
 import pandas as pd
 
-from tfrecorder import schema
+from tfrecorder import input_schema
 
 
 TEST_DIR = 'tfrecorder/test_data'
@@ -44,12 +44,12 @@ def get_test_data() -> Dict[str, List[Any]]:
   return get_test_df().to_dict(orient='list')
 
 
-def get_raw_feature_df() -> pd.DataFrame:
-  """Returns test dataframe having raw feature spec schema."""
+def get_pre_tft_feature_df() -> pd.DataFrame:
+  """Returns test dataframe having pre-TF Transform feature spec schema."""
 
   df = get_test_df()
-  my_raw_schema = schema.get_raw_schema_map(schema.image_csv_schema)
-  image_key = schema.get_key(schema.ImageUriType, schema.image_csv_schema)
+  schema = input_schema.Schema(input_schema.image_csv_schema_map)
+  image_key = schema.image_uri_key
   df.drop([image_key], axis=1, inplace=True)
   df['image_name'] = 'image_name'
   df['image'] = 'image'
@@ -59,8 +59,7 @@ def get_raw_feature_df() -> pd.DataFrame:
   df['image_height'] = '48'
   df['image_width'] = '48'
   df['image_channels'] = '3'
-  df = df[my_raw_schema.keys()]
-
+  df = df[schema.pre_tft_schema_map.keys()]
   return df
 
 
diff --git a/tfrecorder/types.py b/tfrecorder/types.py
index e203d5d..17b8d36 100644
--- a/tfrecorder/types.py
+++ b/tfrecorder/types.py
@@ -16,12 +16,76 @@
 
 """Custom types."""
 
-from typing import Tuple
+import dataclasses
+from typing import Tuple, List, Any
 
+import tensorflow as tf
 from apache_beam.pvalue import PCollection
 from tensorflow_transform import beam as tft_beam
 
-
 BeamDatasetMetadata = tft_beam.tft_beam_io.beam_metadata_io.BeamDatasetMetadata
 TransformedMetadata = BeamDatasetMetadata
 TransformFn = Tuple[PCollection, TransformedMetadata]
+
+
+@dataclasses.dataclass
+class SupportedType:
+  """Base type for TFRecorder Types."""
+  feature_spec: tf.io.FixedLenFeature
+  allowed_values: List[Any]
+
+
+@dataclasses.dataclass
+class ImageUri(SupportedType):
+  """Supports image uri columns."""
+  feature_spec = tf.io.FixedLenFeature([], tf.string)
+  allowed_values = []
+
+
+@dataclasses.dataclass
+class SplitKey(SupportedType):
+  """Supports split key columns."""
+  feature_spec = tf.io.FixedLenFeature([], tf.string)
+  allowed_values = ['TRAIN', 'VALIDATION', 'TEST', 'DISCARD']
+
+
+@dataclasses.dataclass
+class IntegerInput(SupportedType):
+  """Supports integer columns."""
+  feature_spec = tf.io.FixedLenFeature([], tf.int64)
+  allowed_values = []
+
+
+@dataclasses.dataclass
+class FloatInput(SupportedType):
+  """Supports float columns."""
+  feature_spec = tf.io.FixedLenFeature([], tf.float32)
+  allowed_values = []
+
+
+#TODO(mikebernico): Implement in preprocess_fn
+@dataclasses.dataclass
+class StringInput(SupportedType):
+  """Supports string input columns."""
+  feature_spec = tf.io.FixedLenFeature([], tf.string)
+  allowed_values = []
+
+
+@dataclasses.dataclass
+class IntegerLabel(IntegerInput):
+  """Supports integer labels."""
+
+
+@dataclasses.dataclass
+class StringLabel(StringInput):
+  """Supports string labels."""
+
+
+@dataclasses.dataclass
+class ImageSupportString(StringInput):
+  """Supports generated image bytestrings."""
+
+
+@dataclasses.dataclass
+class ImageDim(IntegerInput):
+  """Supports generated image ints (height, width, channels)."""
diff --git a/tfrecorder/check.py b/tfrecorder/utils.py
similarity index 60%
rename from tfrecorder/check.py
rename to tfrecorder/utils.py
index ff9f715..0e032f1 100644
--- a/tfrecorder/check.py
+++ b/tfrecorder/utils.py
@@ -14,18 +14,19 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-"""Utilities for checking content of TFRecord files."""
+"""Miscellaneous utility functions."""
 
-from typing import Dict, Optional, Sequence, Union
+from datetime import datetime
+from typing import Dict
 
 import csv
 import os
 
 import tensorflow as tf
-import tensorflow_transform as tft
 
 from tfrecorder import beam_image
-from tfrecorder import common
+from tfrecorder import constants
+from tfrecorder import dataset_loader
 
 _OUT_IMAGE_TEMPLATE = 'image_{:0>3d}.png'
 
@@ -36,27 +37,6 @@ def _stringify(scalar: tf.Tensor) -> str:
   val = scalar.numpy()
   return val.decode('utf-8') if isinstance(val, bytes) else str(val)
 
-# pylint: disable=too-many-locals
-# TODO(cezequiel): deprecate in favor of `dataset.load`.
-def _read_tfrecords(
-    file_pattern: Union[str, Sequence[str]],
-    tft_output_dir: Optional[str] = None,
-    compression_type: str = 'GZIP') -> tf.data.Dataset:
-  """Reads TFRecords files and outputs a TensorFlow Dataset.
-
-  Currently supports Image CSV format only.
-  """
-
-  files = tf.io.gfile.glob(file_pattern)
-
-  if not tft_output_dir:
-    tft_output_dir = os.path.dirname(file_pattern)
-  tf_transform_output = tft.TFTransformOutput(tft_output_dir)
-  feature_spec = tf_transform_output.transformed_feature_spec()
-  dataset = tf.data.TFRecordDataset(files, compression_type)
-  return dataset.map(lambda x: tf.io.parse_single_example(
-      x, feature_spec))
-
 
 def _save_image_from_record(record: Dict[str, tf.Tensor], outfile: str):
   """Extracts image data from parsed TFRecord and saves it to a file."""
@@ -68,21 +48,32 @@ def _save_image_from_record(record: Dict[str, tf.Tensor], outfile: str):
   image.save(outfile)
 
 
-def check_tfrecords(
-    file_pattern: str,
+def inspect(
+    tfrecord_dir: str,
+    split: str = 'TRAIN',
     num_records: int = 1,
-    output_dir: str = 'output',
-    compression_type: str = 'GZIP'):
-  """Reads TFRecord files and outputs decoded contents to a temp directory."""
+    output_dir: str = 'output'):
+  """Prints contents of TFRecord files generated by TFRecorder.
+
+  Args:
+    tfrecord_dir: TFRecord directory.
+    split: Dataset split (see `schema.allowed_split_values`).
+    num_records: Number of records to output.
+    output_dir: Directory to dump read records.
 
-  dataset = _read_tfrecords(file_pattern, compression_type=compression_type)
+  Raises:
+    `ValueError` when data for a given `split` could not be loaded.
+  """
+
+  dataset = dataset_loader.load(tfrecord_dir).get(split)
+  if not dataset:
+    raise ValueError(f'Could not load data for {split}')
 
   data_dir = os.path.join(
-      output_dir, 'check-tfrecords-' + common.get_timestamp())
+      output_dir, 'check-tfrecords-' + get_timestamp())
   os.makedirs(data_dir)
 
-  csv_file = os.path.join(data_dir, 'data.csv')
-  with open(csv_file, 'wt') as f:
+  with open(os.path.join(data_dir, 'data.csv'), 'wt') as f:
     writer = csv.writer(f)
 
     # Write CSV header
@@ -108,3 +99,21 @@ def check_tfrecords(
     print('Output written to {}'.format(data_dir))
 
     return data_dir
+
+
+def get_timestamp() -> str:
+  """Returns current date and time as formatted string."""
+  return datetime.now().strftime('%Y%m%d-%H%M%S')
+
+
+def copy_logfile_to_gcs(logfile: str, output_dir: str):
+  """Copies a logfile from local to gcs storage."""
+  try:
+    with open(logfile, 'r') as log_reader:
+      out_log = os.path.join(output_dir, constants.LOGFILE)
+      with tf.io.gfile.GFile(out_log, 'w') as gcs_logfile:
+        log = log_reader.read()
+        gcs_logfile.write(log)
+  except FileNotFoundError as e:
+    raise FileNotFoundError("Unable to copy log file {} to gcs.".format(
+        e.filename)) from e
diff --git a/tfrecorder/check_test.py b/tfrecorder/utils_test.py
similarity index 60%
rename from tfrecorder/check_test.py
rename to tfrecorder/utils_test.py
index feeeb6e..77331e4 100644
--- a/tfrecorder/check_test.py
+++ b/tfrecorder/utils_test.py
@@ -14,7 +14,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-"""Tests `check.py`."""
+"""Tests `utils.py`."""
 
 import functools
 import os
@@ -27,49 +27,15 @@
 import tensorflow as tf
 
 from tfrecorder import beam_image
-from tfrecorder import check
+from tfrecorder import constants
+from tfrecorder import utils
 from tfrecorder import test_utils
-from tfrecorder import schema
+from tfrecorder import input_schema
+from tfrecorder import dataset_loader
 
 
 # pylint: disable=protected-access
 
-class ReadTFRecordsTest(unittest.TestCase):
-  """Tests `_read_tfrecords`."""
-
-  def setUp(self):
-    self.tfrecords_dir = os.path.join(test_utils.TEST_DIR, 'sample_tfrecords')
-
-  def test_valid_compressed_gzip(self):
-    """Tests valid case using GZIP compression."""
-
-    # Use list of file pattern strings to maintain train, validation, test
-    # order.
-    file_pattern = [
-        os.path.join(self.tfrecords_dir, '{}*.tfrecord.gz'.format(f))
-        for f in ['train, validation, test']]
-
-    compression_type = 'GZIP'
-    actual = check._read_tfrecords(
-        file_pattern, self.tfrecords_dir, compression_type)
-
-    expected_csv = os.path.join(test_utils.TEST_DIR, 'data.csv')
-    expected = tf.data.experimental.make_csv_dataset(
-        expected_csv, batch_size=1, label_name=None, num_epochs=1,
-        shuffle=False)
-
-    for a, e in zip(actual, expected):
-      self.assertCountEqual(a.keys(), schema.image_csv_schema)
-      for key in schema.image_csv_schema:
-        self.assertEqual(a[key], e[key])
-
-  def test_error_invalid_file_pattern(self):
-    """Tests error case where file pattern is invalid."""
-
-    file_pattern = 'gs://path/to/memes/folder'
-    with self.assertRaises(tf.errors.OpError):
-      check._read_tfrecords(file_pattern)
-
 
 class CheckTFRecordsTest(unittest.TestCase):
   """Tests `check_tfrecords`."""
@@ -85,7 +51,8 @@ def setUp(self):
         image_channels)
 
     data = test_utils.get_test_data()
-    image_uri_key = schema.get_key(schema.ImageUriType, schema.image_csv_schema)
+    schema = input_schema.IMAGE_CSV_SCHEMA
+    image_uri_key = schema.image_uri_key
     num_records = len(data[image_uri_key])
     image_uris = data.pop(image_uri_key)
     data['image_name'] = [os.path.split(uri)[-1] for uri in image_uris]
@@ -96,21 +63,23 @@ def setUp(self):
         'image_width': [image_width] * num_records,
         'image_channels': [image_channels] * num_records,
     })
+    self.tfrecord_dir = 'gs://path/to/tfrecords/dir'
+    self.split = 'TRAIN'
     self.num_records = num_records
     self.data = data
     self.dataset = tf.data.Dataset.from_tensor_slices(self.data)
 
-  @mock.patch.object(check, '_read_tfrecords', autospec=True)
+  @mock.patch.object(dataset_loader, 'load', autospec=True)
   def test_valid_records(self, mock_fn):
     """Tests valid case on reading multiple records."""
 
-    file_pattern = 'gs://path/to/tfrecords/*'
-    mock_fn.return_value = self.dataset
+    mock_fn.return_value = {self.split: self.dataset}
     num_records = len(self.data['image'])
 
     with tempfile.TemporaryDirectory(dir='/tmp') as dir_:
-      actual_dir = check.check_tfrecords(
-          file_pattern, num_records=num_records, output_dir=dir_)
+      actual_dir = utils.inspect(
+          self.tfrecord_dir, split=self.split, num_records=num_records,
+          output_dir=dir_)
       self.assertTrue('check-tfrecords-' in actual_dir)
 
       actual_csv = os.path.join(actual_dir, 'data.csv')
@@ -129,6 +98,39 @@ def test_valid_records(self, mock_fn):
       expected_image_files = self.data['image_name']
       self.assertCountEqual(actual_image_files, expected_image_files)
 
+  @mock.patch.object(dataset_loader, 'load', autospec=True)
+  def test_no_data_for_split(self, mock_fn):
+    """Check exception raised when data could not be loaded given `split`."""
+
+    mock_fn.return_value = {}
+    with self.assertRaisesRegex(ValueError, 'Could not load data for'):
+      utils.inspect(self.tfrecord_dir, split='UNSUPPORTED')
+
 
 if __name__ == '__main__':
   unittest.main()
+
+
+class CopyLogTest(unittest.TestCase):
+  """Misc tests for _copy_logfile_to_gcs."""
+
+  def test_valid_copy(self):
+    """Test valid file copy."""
+    with tempfile.TemporaryDirectory() as tmpdirname:
+      text = 'log test log test'
+      infile = os.path.join(tmpdirname, 'foo.log')
+      with open(infile, 'w') as f:
+        f.write(text)
+      utils.copy_logfile_to_gcs(infile, tmpdirname)
+
+      outfile = os.path.join(tmpdirname, constants.LOGFILE)
+      with open(outfile, 'r') as f:
+        data = f.read()
+        self.assertEqual(text, data)
+
+  def test_invalid_copy(self):
+    """Test invalid file copy."""
+    with tempfile.TemporaryDirectory() as tmpdirname:
+      infile = os.path.join(tmpdirname, 'foo.txt')
+      with self.assertRaises(FileNotFoundError):
+        utils.copy_logfile_to_gcs(infile, tmpdirname)