# **Part B - TFXinteractivecontext pipeline on NYC taxi data**

In [1]:
try:
  import colab
  !pip install --upgrade pip
except:
  pass

Collecting pip
[?25l  Downloading https://files.pythonhosted.org/packages/ac/cf/0cc542fc93de2f3b9b53cb979c7d1118cffb93204afb46299a9f858e113f/pip-21.1-py3-none-any.whl (1.5MB)
[K     |▏                               | 10kB 15.7MB/s eta 0:00:01[K     |▍                               | 20kB 20.7MB/s eta 0:00:01[K     |▋                               | 30kB 25.4MB/s eta 0:00:01[K     |▉                               | 40kB 19.8MB/s eta 0:00:01[K     |█                               | 51kB 18.9MB/s eta 0:00:01[K     |█▎                              | 61kB 16.3MB/s eta 0:00:01[K     |█▌                              | 71kB 17.0MB/s eta 0:00:01[K     |█▊                              | 81kB 14.4MB/s eta 0:00:01[K     |██                              | 92kB 15.0MB/s eta 0:00:01[K     |██▏                             | 102kB 15.0MB/s eta 0:00:01[K     |██▎                             | 112kB 15.0MB/s eta 0:00:01[K     |██▌                             | 122kB 15.0MB/s eta 

In [None]:
!pip install -q -U tfx

In [1]:
#importing necessary libraries

%load_ext tensorboard

import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import folium

from folium import plugins

import tensorflow as tf
import tensorflow.keras as keras
print(tf.__version__)

2.4.1


In [2]:
import os
import pprint
import tempfile
import urllib

import absl
import tensorflow as tf
import tensorflow_model_analysis as tfma
tf.get_logger().propagate = False
pp = pprint.PrettyPrinter()

import tfx
from tfx.components import CsvExampleGen
from tfx.components import Evaluator
from tfx.components import ExampleValidator
from tfx.components import Pusher
from tfx.components import ResolverNode
from tfx.components import SchemaGen
from tfx.components import StatisticsGen
from tfx.components import Trainer
from tfx.components import Transform
from tfx.components.base import executor_spec
from tfx.components.trainer.executor import GenericExecutor
from tfx.dsl.experimental import latest_blessed_model_resolver
from tfx.orchestration import metadata
from tfx.orchestration import pipeline
from tfx.orchestration.experimental.interactive.interactive_context import InteractiveContext
from tfx.proto import pusher_pb2
from tfx.proto import trainer_pb2
from tfx.types import Channel
from tfx.types.standard_artifacts import Model
from tfx.types.standard_artifacts import ModelBlessing
from tfx.utils.dsl_utils import external_input

%load_ext tfx.orchestration.experimental.interactive.notebook_extensions.skip
#%reload_ext tfx.orchestration.experimental.interactive.notebook_extensions.skip



In [3]:
print('TensorFlow version: {}'.format(tf.__version__))
print('TFX version: {}'.format(tfx.__version__))

TensorFlow version: 2.4.1
TFX version: 0.29.0


In [4]:
#tensorflow directories in content
!cd /content/
!mkdir /content/tfx/
!mkdir /content/tfx/pipelines
!mkdir /content/tfx/metadata
!mkdir /content/tfx/logs
!mkdir /content/tfx/data
!mkdir /content/tfx/serving_model

In [5]:
#downloading the data
!wget https://raw.githubusercontent.com/tensorflow/tfx/master/tfx/examples/chicago_taxi_pipeline/data/simple/data.csv

--2021-04-26 04:46:57--  https://raw.githubusercontent.com/tensorflow/tfx/master/tfx/examples/chicago_taxi_pipeline/data/simple/data.csv
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.110.133, 185.199.111.133, 185.199.108.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.110.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 1922812 (1.8M) [text/plain]
Saving to: ‘data.csv’


2021-04-26 04:46:57 (66.0 MB/s) - ‘data.csv’ saved [1922812/1922812]



In [6]:
#reading the downloaded data using pandas and printing the information
df = pd.read_csv('/content/data.csv')
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 15002 entries, 0 to 15001
Data columns (total 18 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   pickup_community_area   15000 non-null  float64
 1   fare                    15002 non-null  float64
 2   trip_start_month        15002 non-null  int64  
 3   trip_start_hour         15002 non-null  int64  
 4   trip_start_day          15002 non-null  int64  
 5   trip_start_timestamp    15002 non-null  int64  
 6   pickup_latitude         15000 non-null  float64
 7   pickup_longitude        15000 non-null  float64
 8   dropoff_latitude        14519 non-null  float64
 9   dropoff_longitude       14519 non-null  float64
 10  trip_miles              15002 non-null  float64
 11  pickup_census_tract     1 non-null      float64
 12  dropoff_census_tract    10761 non-null  float64
 13  payment_type            15002 non-null  object 
 14  company                 9862 non-null 

In [7]:
#Preprocessing similar to part_a of the assignment 

#dropping unneccessary columns
df = df.drop(['trip_start_timestamp','trip_miles','pickup_census_tract',
              'dropoff_census_tract','trip_seconds','payment_type','tips', 
              'company','dropoff_community_area','pickup_community_area'], axis=1)

In [8]:
#dropping the null values 
df = df.dropna()

**Splitting to train val and saving them as csv files**

In [9]:
#train test split and saving the dataframes as csv files
np.random.seed(seed=2)
msk = np.random.rand(len(df)) < 0.85
traindf = df[msk]
evaldf = df[~msk]

print(len(traindf))
print(len(evaldf))

traindf.to_csv("/content/tfx/data/data_trans.csv", index=False, header=True)
evaldf.to_csv("eval.csv", index=False, header=False)

12348
2171


**Defining pipeline paths for TFX**

In [10]:
#define paths

_tfx_root = os.path.join(os.getcwd(), 'tfx');        # Create location ~/tfx
_pipeline_root = os.path.join(_tfx_root, 'pipelines');      # Join ~/tfx/pipelines/
_metadata_db_root = os.path.join(_tfx_root, 'metadata.db');    # Join ~/tfx/metadata.db
_log_root = os.path.join(_tfx_root, 'logs');
_model_root = os.path.join(_tfx_root, 'model');
_data_root = os.path.join(_tfx_root, 'data');
_serving_model_dir = os.path.join(_tfx_root, 'serving_model')
_data_filepath = os.path.join(_data_root, "data_trans.csv")

_input_fn_module_file = 'inputfn_trainer.py'
_constants_module_file = 'constants_trainer.py'
_model_trainer_module_file = 'model_trainer.py'

**InteractiveContext component initialization**

In [11]:
context = InteractiveContext(pipeline_root=_tfx_root)



## **Running TFX components interactively with examples**

In [12]:
example_gen = CsvExampleGen(input=external_input(_data_root))
context.run(example_gen)

Instructions for updating:
external_input is deprecated, directly pass the uri to ExampleGen.




0,1
.execution_id,1
.component,"function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } CsvExampleGen at 0x7fc303c6db50.inputs{}.outputs['examples'] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'Examples' (1 artifact) at 0x7fc3033af050.type_nameExamples._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Examples' (uri: /content/tfx/CsvExampleGen/examples/1) at 0x7fc303ef3690.type<class 'tfx.types.standard_artifacts.Examples'>.uri/content/tfx/CsvExampleGen/examples/1.span0.split_names[""train"", ""eval""].version0.exec_properties['input_base']/content/tfx/data['input_config']{  ""splits"": [  {  ""name"": ""single_split"",  ""pattern"": ""*""  }  ] }['output_config']{  ""split_config"": {  ""splits"": [  {  ""hash_buckets"": 2,  ""name"": ""train""  },  {  ""hash_buckets"": 1,  ""name"": ""eval""  }  ]  } }['output_data_format']6['custom_config']None['range_config']None['span']0['version']None['input_fingerprint']split:single_split,num_files:1,total_bytes:856594,xor_checksum:1619412421,sum_checksum:1619412421"
.component.inputs,{}
.component.outputs,"['examples'] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'Examples' (1 artifact) at 0x7fc3033af050.type_nameExamples._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Examples' (uri: /content/tfx/CsvExampleGen/examples/1) at 0x7fc303ef3690.type<class 'tfx.types.standard_artifacts.Examples'>.uri/content/tfx/CsvExampleGen/examples/1.span0.split_names[""train"", ""eval""].version0"

0,1
.inputs,{}
.outputs,"['examples'] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'Examples' (1 artifact) at 0x7fc3033af050.type_nameExamples._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Examples' (uri: /content/tfx/CsvExampleGen/examples/1) at 0x7fc303ef3690.type<class 'tfx.types.standard_artifacts.Examples'>.uri/content/tfx/CsvExampleGen/examples/1.span0.split_names[""train"", ""eval""].version0"
.exec_properties,"['input_base']/content/tfx/data['input_config']{  ""splits"": [  {  ""name"": ""single_split"",  ""pattern"": ""*""  }  ] }['output_config']{  ""split_config"": {  ""splits"": [  {  ""hash_buckets"": 2,  ""name"": ""train""  },  {  ""hash_buckets"": 1,  ""name"": ""eval""  }  ]  } }['output_data_format']6['custom_config']None['range_config']None['span']0['version']None['input_fingerprint']split:single_split,num_files:1,total_bytes:856594,xor_checksum:1619412421,sum_checksum:1619412421"

0,1
['examples'],"function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'Examples' (1 artifact) at 0x7fc3033af050.type_nameExamples._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Examples' (uri: /content/tfx/CsvExampleGen/examples/1) at 0x7fc303ef3690.type<class 'tfx.types.standard_artifacts.Examples'>.uri/content/tfx/CsvExampleGen/examples/1.span0.split_names[""train"", ""eval""].version0"

0,1
.type_name,Examples
._artifacts,"[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Examples' (uri: /content/tfx/CsvExampleGen/examples/1) at 0x7fc303ef3690.type<class 'tfx.types.standard_artifacts.Examples'>.uri/content/tfx/CsvExampleGen/examples/1.span0.split_names[""train"", ""eval""].version0"

0,1
[0],"function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Examples' (uri: /content/tfx/CsvExampleGen/examples/1) at 0x7fc303ef3690.type<class 'tfx.types.standard_artifacts.Examples'>.uri/content/tfx/CsvExampleGen/examples/1.span0.split_names[""train"", ""eval""].version0"

0,1
.type,<class 'tfx.types.standard_artifacts.Examples'>
.uri,/content/tfx/CsvExampleGen/examples/1
.span,0
.split_names,"[""train"", ""eval""]"
.version,0

0,1
['input_base'],/content/tfx/data
['input_config'],"{  ""splits"": [  {  ""name"": ""single_split"",  ""pattern"": ""*""  }  ] }"
['output_config'],"{  ""split_config"": {  ""splits"": [  {  ""hash_buckets"": 2,  ""name"": ""train""  },  {  ""hash_buckets"": 1,  ""name"": ""eval""  }  ]  } }"
['output_data_format'],6
['custom_config'],
['range_config'],
['span'],0
['version'],
['input_fingerprint'],"split:single_split,num_files:1,total_bytes:856594,xor_checksum:1619412421,sum_checksum:1619412421"

0,1
['examples'],"function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'Examples' (1 artifact) at 0x7fc3033af050.type_nameExamples._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Examples' (uri: /content/tfx/CsvExampleGen/examples/1) at 0x7fc303ef3690.type<class 'tfx.types.standard_artifacts.Examples'>.uri/content/tfx/CsvExampleGen/examples/1.span0.split_names[""train"", ""eval""].version0"

0,1
.type_name,Examples
._artifacts,"[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Examples' (uri: /content/tfx/CsvExampleGen/examples/1) at 0x7fc303ef3690.type<class 'tfx.types.standard_artifacts.Examples'>.uri/content/tfx/CsvExampleGen/examples/1.span0.split_names[""train"", ""eval""].version0"

0,1
[0],"function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Examples' (uri: /content/tfx/CsvExampleGen/examples/1) at 0x7fc303ef3690.type<class 'tfx.types.standard_artifacts.Examples'>.uri/content/tfx/CsvExampleGen/examples/1.span0.split_names[""train"", ""eval""].version0"

0,1
.type,<class 'tfx.types.standard_artifacts.Examples'>
.uri,/content/tfx/CsvExampleGen/examples/1
.span,0
.split_names,"[""train"", ""eval""]"
.version,0


This component produces two artifacts, training examples and evaluation examples 

In [13]:
artifact = example_gen.outputs['examples'].get()[0]
print(artifact.split_names, artifact.uri)

["train", "eval"] /content/tfx/CsvExampleGen/examples/1


LOoking at the output of the first three training examples

In [14]:
#Taking a look at the first three training examples

#Get the URI of the output artifact representing the training examples, which is a directory
train_uri = os.path.join(example_gen.outputs['examples'].get()[0].uri, 'Split-train')

#Get the list of files in this directory (all compressed TFRecord files)
tfrecord_filenames = [os.path.join(train_uri, name)
                      for name in os.listdir(train_uri)]

#Create a `TFRecordDataset` to read these files
dataset = tf.data.TFRecordDataset(tfrecord_filenames, compression_type="GZIP")

#Iterate over the first 3 records and decode them.
for tfrecord in dataset.take(3):
  serialized_example = tfrecord.numpy()
  example = tf.train.Example()
  example.ParseFromString(serialized_example)
  pp.pprint(example)

features {
  feature {
    key: "dropoff_latitude"
    value {
      float_list {
        value: 41.92045211791992
      }
    }
  }
  feature {
    key: "dropoff_longitude"
    value {
      float_list {
        value: -87.6799545288086
      }
    }
  }
  feature {
    key: "fare"
    value {
      float_list {
        value: 3.8499999046325684
      }
    }
  }
  feature {
    key: "pickup_latitude"
    value {
      float_list {
        value: 41.8996696472168
      }
    }
  }
  feature {
    key: "pickup_longitude"
    value {
      float_list {
        value: -87.66983795166016
      }
    }
  }
  feature {
    key: "trip_start_day"
    value {
      int64_list {
        value: 6
      }
    }
  }
  feature {
    key: "trip_start_hour"
    value {
      int64_list {
        value: 15
      }
    }
  }
  feature {
    key: "trip_start_month"
    value {
      int64_list {
        value: 3
      }
    }
  }
}

features {
  feature {
    key: "dropoff_latitude"
    value {
      fl

**StatisticsGen**
This takes as input the dataset ingested using ExampleGen

In [15]:
statistics_gen = StatisticsGen(
    examples=example_gen.outputs['examples'])
context.run(statistics_gen)

0,1
.execution_id,2
.component,"function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } StatisticsGen at 0x7fc301da6050.inputs['examples'] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'Examples' (1 artifact) at 0x7fc3033af050.type_nameExamples._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Examples' (uri: /content/tfx/CsvExampleGen/examples/1) at 0x7fc303ef3690.type<class 'tfx.types.standard_artifacts.Examples'>.uri/content/tfx/CsvExampleGen/examples/1.span0.split_names[""train"", ""eval""].version0.outputs['statistics'] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'ExampleStatistics' (1 artifact) at 0x7fc301da60d0.type_nameExampleStatistics._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExampleStatistics' (uri: /content/tfx/StatisticsGen/statistics/2) at 0x7fc301b0ef10.type<class 'tfx.types.standard_artifacts.ExampleStatistics'>.uri/content/tfx/StatisticsGen/statistics/2.span0.split_names[""train"", ""eval""].exec_properties['stats_options_json']None['exclude_splits'][]"
.component.inputs,"['examples'] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'Examples' (1 artifact) at 0x7fc3033af050.type_nameExamples._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Examples' (uri: /content/tfx/CsvExampleGen/examples/1) at 0x7fc303ef3690.type<class 'tfx.types.standard_artifacts.Examples'>.uri/content/tfx/CsvExampleGen/examples/1.span0.split_names[""train"", ""eval""].version0"
.component.outputs,"['statistics'] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'ExampleStatistics' (1 artifact) at 0x7fc301da60d0.type_nameExampleStatistics._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExampleStatistics' (uri: /content/tfx/StatisticsGen/statistics/2) at 0x7fc301b0ef10.type<class 'tfx.types.standard_artifacts.ExampleStatistics'>.uri/content/tfx/StatisticsGen/statistics/2.span0.split_names[""train"", ""eval""]"

0,1
.inputs,"['examples'] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'Examples' (1 artifact) at 0x7fc3033af050.type_nameExamples._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Examples' (uri: /content/tfx/CsvExampleGen/examples/1) at 0x7fc303ef3690.type<class 'tfx.types.standard_artifacts.Examples'>.uri/content/tfx/CsvExampleGen/examples/1.span0.split_names[""train"", ""eval""].version0"
.outputs,"['statistics'] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'ExampleStatistics' (1 artifact) at 0x7fc301da60d0.type_nameExampleStatistics._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExampleStatistics' (uri: /content/tfx/StatisticsGen/statistics/2) at 0x7fc301b0ef10.type<class 'tfx.types.standard_artifacts.ExampleStatistics'>.uri/content/tfx/StatisticsGen/statistics/2.span0.split_names[""train"", ""eval""]"
.exec_properties,['stats_options_json']None['exclude_splits'][]

0,1
['examples'],"function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'Examples' (1 artifact) at 0x7fc3033af050.type_nameExamples._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Examples' (uri: /content/tfx/CsvExampleGen/examples/1) at 0x7fc303ef3690.type<class 'tfx.types.standard_artifacts.Examples'>.uri/content/tfx/CsvExampleGen/examples/1.span0.split_names[""train"", ""eval""].version0"

0,1
.type_name,Examples
._artifacts,"[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Examples' (uri: /content/tfx/CsvExampleGen/examples/1) at 0x7fc303ef3690.type<class 'tfx.types.standard_artifacts.Examples'>.uri/content/tfx/CsvExampleGen/examples/1.span0.split_names[""train"", ""eval""].version0"

0,1
[0],"function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Examples' (uri: /content/tfx/CsvExampleGen/examples/1) at 0x7fc303ef3690.type<class 'tfx.types.standard_artifacts.Examples'>.uri/content/tfx/CsvExampleGen/examples/1.span0.split_names[""train"", ""eval""].version0"

0,1
.type,<class 'tfx.types.standard_artifacts.Examples'>
.uri,/content/tfx/CsvExampleGen/examples/1
.span,0
.split_names,"[""train"", ""eval""]"
.version,0

0,1
['statistics'],"function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'ExampleStatistics' (1 artifact) at 0x7fc301da60d0.type_nameExampleStatistics._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExampleStatistics' (uri: /content/tfx/StatisticsGen/statistics/2) at 0x7fc301b0ef10.type<class 'tfx.types.standard_artifacts.ExampleStatistics'>.uri/content/tfx/StatisticsGen/statistics/2.span0.split_names[""train"", ""eval""]"

0,1
.type_name,ExampleStatistics
._artifacts,"[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExampleStatistics' (uri: /content/tfx/StatisticsGen/statistics/2) at 0x7fc301b0ef10.type<class 'tfx.types.standard_artifacts.ExampleStatistics'>.uri/content/tfx/StatisticsGen/statistics/2.span0.split_names[""train"", ""eval""]"

0,1
[0],"function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExampleStatistics' (uri: /content/tfx/StatisticsGen/statistics/2) at 0x7fc301b0ef10.type<class 'tfx.types.standard_artifacts.ExampleStatistics'>.uri/content/tfx/StatisticsGen/statistics/2.span0.split_names[""train"", ""eval""]"

0,1
.type,<class 'tfx.types.standard_artifacts.ExampleStatistics'>
.uri,/content/tfx/StatisticsGen/statistics/2
.span,0
.split_names,"[""train"", ""eval""]"

0,1
['stats_options_json'],
['exclude_splits'],[]

0,1
['examples'],"function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'Examples' (1 artifact) at 0x7fc3033af050.type_nameExamples._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Examples' (uri: /content/tfx/CsvExampleGen/examples/1) at 0x7fc303ef3690.type<class 'tfx.types.standard_artifacts.Examples'>.uri/content/tfx/CsvExampleGen/examples/1.span0.split_names[""train"", ""eval""].version0"

0,1
.type_name,Examples
._artifacts,"[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Examples' (uri: /content/tfx/CsvExampleGen/examples/1) at 0x7fc303ef3690.type<class 'tfx.types.standard_artifacts.Examples'>.uri/content/tfx/CsvExampleGen/examples/1.span0.split_names[""train"", ""eval""].version0"

0,1
[0],"function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Examples' (uri: /content/tfx/CsvExampleGen/examples/1) at 0x7fc303ef3690.type<class 'tfx.types.standard_artifacts.Examples'>.uri/content/tfx/CsvExampleGen/examples/1.span0.split_names[""train"", ""eval""].version0"

0,1
.type,<class 'tfx.types.standard_artifacts.Examples'>
.uri,/content/tfx/CsvExampleGen/examples/1
.span,0
.split_names,"[""train"", ""eval""]"
.version,0

0,1
['statistics'],"function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'ExampleStatistics' (1 artifact) at 0x7fc301da60d0.type_nameExampleStatistics._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExampleStatistics' (uri: /content/tfx/StatisticsGen/statistics/2) at 0x7fc301b0ef10.type<class 'tfx.types.standard_artifacts.ExampleStatistics'>.uri/content/tfx/StatisticsGen/statistics/2.span0.split_names[""train"", ""eval""]"

0,1
.type_name,ExampleStatistics
._artifacts,"[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExampleStatistics' (uri: /content/tfx/StatisticsGen/statistics/2) at 0x7fc301b0ef10.type<class 'tfx.types.standard_artifacts.ExampleStatistics'>.uri/content/tfx/StatisticsGen/statistics/2.span0.split_names[""train"", ""eval""]"

0,1
[0],"function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExampleStatistics' (uri: /content/tfx/StatisticsGen/statistics/2) at 0x7fc301b0ef10.type<class 'tfx.types.standard_artifacts.ExampleStatistics'>.uri/content/tfx/StatisticsGen/statistics/2.span0.split_names[""train"", ""eval""]"

0,1
.type,<class 'tfx.types.standard_artifacts.ExampleStatistics'>
.uri,/content/tfx/StatisticsGen/statistics/2
.span,0
.split_names,"[""train"", ""eval""]"


In [16]:
context.show(statistics_gen.outputs['statistics'])

The above output shows the statistical output of the data

**Schema Gen** This uses output from StatisticsGen as input

In [17]:
schema_gen = SchemaGen(
    statistics=statistics_gen.outputs['statistics'],
    infer_feature_shape=False)
context.run(schema_gen)

0,1
.execution_id,3
.component,"function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } SchemaGen at 0x7fc2fcec7610.inputs['statistics'] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'ExampleStatistics' (1 artifact) at 0x7fc301da60d0.type_nameExampleStatistics._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExampleStatistics' (uri: /content/tfx/StatisticsGen/statistics/2) at 0x7fc301b0ef10.type<class 'tfx.types.standard_artifacts.ExampleStatistics'>.uri/content/tfx/StatisticsGen/statistics/2.span0.split_names[""train"", ""eval""].outputs['schema'] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'Schema' (1 artifact) at 0x7fc2fcec76d0.type_nameSchema._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Schema' (uri: /content/tfx/SchemaGen/schema/3) at 0x7fc2fce9cf50.type<class 'tfx.types.standard_artifacts.Schema'>.uri/content/tfx/SchemaGen/schema/3.exec_properties['infer_feature_shape']0['exclude_splits'][]"
.component.inputs,"['statistics'] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'ExampleStatistics' (1 artifact) at 0x7fc301da60d0.type_nameExampleStatistics._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExampleStatistics' (uri: /content/tfx/StatisticsGen/statistics/2) at 0x7fc301b0ef10.type<class 'tfx.types.standard_artifacts.ExampleStatistics'>.uri/content/tfx/StatisticsGen/statistics/2.span0.split_names[""train"", ""eval""]"
.component.outputs,['schema'] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'Schema' (1 artifact) at 0x7fc2fcec76d0.type_nameSchema._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Schema' (uri: /content/tfx/SchemaGen/schema/3) at 0x7fc2fce9cf50.type<class 'tfx.types.standard_artifacts.Schema'>.uri/content/tfx/SchemaGen/schema/3

0,1
.inputs,"['statistics'] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'ExampleStatistics' (1 artifact) at 0x7fc301da60d0.type_nameExampleStatistics._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExampleStatistics' (uri: /content/tfx/StatisticsGen/statistics/2) at 0x7fc301b0ef10.type<class 'tfx.types.standard_artifacts.ExampleStatistics'>.uri/content/tfx/StatisticsGen/statistics/2.span0.split_names[""train"", ""eval""]"
.outputs,['schema'] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'Schema' (1 artifact) at 0x7fc2fcec76d0.type_nameSchema._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Schema' (uri: /content/tfx/SchemaGen/schema/3) at 0x7fc2fce9cf50.type<class 'tfx.types.standard_artifacts.Schema'>.uri/content/tfx/SchemaGen/schema/3
.exec_properties,['infer_feature_shape']0['exclude_splits'][]

0,1
['statistics'],"function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'ExampleStatistics' (1 artifact) at 0x7fc301da60d0.type_nameExampleStatistics._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExampleStatistics' (uri: /content/tfx/StatisticsGen/statistics/2) at 0x7fc301b0ef10.type<class 'tfx.types.standard_artifacts.ExampleStatistics'>.uri/content/tfx/StatisticsGen/statistics/2.span0.split_names[""train"", ""eval""]"

0,1
.type_name,ExampleStatistics
._artifacts,"[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExampleStatistics' (uri: /content/tfx/StatisticsGen/statistics/2) at 0x7fc301b0ef10.type<class 'tfx.types.standard_artifacts.ExampleStatistics'>.uri/content/tfx/StatisticsGen/statistics/2.span0.split_names[""train"", ""eval""]"

0,1
[0],"function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExampleStatistics' (uri: /content/tfx/StatisticsGen/statistics/2) at 0x7fc301b0ef10.type<class 'tfx.types.standard_artifacts.ExampleStatistics'>.uri/content/tfx/StatisticsGen/statistics/2.span0.split_names[""train"", ""eval""]"

0,1
.type,<class 'tfx.types.standard_artifacts.ExampleStatistics'>
.uri,/content/tfx/StatisticsGen/statistics/2
.span,0
.split_names,"[""train"", ""eval""]"

0,1
['schema'],function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'Schema' (1 artifact) at 0x7fc2fcec76d0.type_nameSchema._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Schema' (uri: /content/tfx/SchemaGen/schema/3) at 0x7fc2fce9cf50.type<class 'tfx.types.standard_artifacts.Schema'>.uri/content/tfx/SchemaGen/schema/3

0,1
.type_name,Schema
._artifacts,[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Schema' (uri: /content/tfx/SchemaGen/schema/3) at 0x7fc2fce9cf50.type<class 'tfx.types.standard_artifacts.Schema'>.uri/content/tfx/SchemaGen/schema/3

0,1
[0],function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Schema' (uri: /content/tfx/SchemaGen/schema/3) at 0x7fc2fce9cf50.type<class 'tfx.types.standard_artifacts.Schema'>.uri/content/tfx/SchemaGen/schema/3

0,1
.type,<class 'tfx.types.standard_artifacts.Schema'>
.uri,/content/tfx/SchemaGen/schema/3

0,1
['infer_feature_shape'],0
['exclude_splits'],[]

0,1
['statistics'],"function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'ExampleStatistics' (1 artifact) at 0x7fc301da60d0.type_nameExampleStatistics._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExampleStatistics' (uri: /content/tfx/StatisticsGen/statistics/2) at 0x7fc301b0ef10.type<class 'tfx.types.standard_artifacts.ExampleStatistics'>.uri/content/tfx/StatisticsGen/statistics/2.span0.split_names[""train"", ""eval""]"

0,1
.type_name,ExampleStatistics
._artifacts,"[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExampleStatistics' (uri: /content/tfx/StatisticsGen/statistics/2) at 0x7fc301b0ef10.type<class 'tfx.types.standard_artifacts.ExampleStatistics'>.uri/content/tfx/StatisticsGen/statistics/2.span0.split_names[""train"", ""eval""]"

0,1
[0],"function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExampleStatistics' (uri: /content/tfx/StatisticsGen/statistics/2) at 0x7fc301b0ef10.type<class 'tfx.types.standard_artifacts.ExampleStatistics'>.uri/content/tfx/StatisticsGen/statistics/2.span0.split_names[""train"", ""eval""]"

0,1
.type,<class 'tfx.types.standard_artifacts.ExampleStatistics'>
.uri,/content/tfx/StatisticsGen/statistics/2
.span,0
.split_names,"[""train"", ""eval""]"

0,1
['schema'],function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'Schema' (1 artifact) at 0x7fc2fcec76d0.type_nameSchema._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Schema' (uri: /content/tfx/SchemaGen/schema/3) at 0x7fc2fce9cf50.type<class 'tfx.types.standard_artifacts.Schema'>.uri/content/tfx/SchemaGen/schema/3

0,1
.type_name,Schema
._artifacts,[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Schema' (uri: /content/tfx/SchemaGen/schema/3) at 0x7fc2fce9cf50.type<class 'tfx.types.standard_artifacts.Schema'>.uri/content/tfx/SchemaGen/schema/3

0,1
[0],function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Schema' (uri: /content/tfx/SchemaGen/schema/3) at 0x7fc2fce9cf50.type<class 'tfx.types.standard_artifacts.Schema'>.uri/content/tfx/SchemaGen/schema/3

0,1
.type,<class 'tfx.types.standard_artifacts.Schema'>
.uri,/content/tfx/SchemaGen/schema/3


In [18]:
context.show(schema_gen.outputs['schema'])

Unnamed: 0_level_0,Type,Presence,Valency,Domain
Feature name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
'dropoff_latitude',FLOAT,required,single,-
'dropoff_longitude',FLOAT,required,single,-
'fare',FLOAT,required,single,-
'pickup_latitude',FLOAT,required,single,-
'pickup_longitude',FLOAT,required,single,-
'trip_start_day',INT,required,single,-
'trip_start_hour',INT,required,single,-
'trip_start_month',INT,required,single,-


This output gives more information about our data

**ExampleValidator** 
Detects anomalies based on the expectations defined by the schema, will take input from StatisticsGen and the schema from SchemaGen

In [19]:
example_validator = ExampleValidator(
    statistics=statistics_gen.outputs['statistics'],
    schema=schema_gen.outputs['schema'])
context.run(example_validator)

0,1
.execution_id,4
.component,"function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } ExampleValidator at 0x7fc2fcf09710.inputs['statistics'] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'ExampleStatistics' (1 artifact) at 0x7fc301da60d0.type_nameExampleStatistics._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExampleStatistics' (uri: /content/tfx/StatisticsGen/statistics/2) at 0x7fc301b0ef10.type<class 'tfx.types.standard_artifacts.ExampleStatistics'>.uri/content/tfx/StatisticsGen/statistics/2.span0.split_names[""train"", ""eval""]['schema'] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'Schema' (1 artifact) at 0x7fc2fcec76d0.type_nameSchema._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Schema' (uri: /content/tfx/SchemaGen/schema/3) at 0x7fc2fce9cf50.type<class 'tfx.types.standard_artifacts.Schema'>.uri/content/tfx/SchemaGen/schema/3.outputs['anomalies'] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'ExampleAnomalies' (1 artifact) at 0x7fc2fcf09f90.type_nameExampleAnomalies._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExampleAnomalies' (uri: /content/tfx/ExampleValidator/anomalies/4) at 0x7fc3016a7950.type<class 'tfx.types.standard_artifacts.ExampleAnomalies'>.uri/content/tfx/ExampleValidator/anomalies/4.span0.split_names[""train"", ""eval""].exec_properties['exclude_splits'][]"
.component.inputs,"['statistics'] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'ExampleStatistics' (1 artifact) at 0x7fc301da60d0.type_nameExampleStatistics._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExampleStatistics' (uri: /content/tfx/StatisticsGen/statistics/2) at 0x7fc301b0ef10.type<class 'tfx.types.standard_artifacts.ExampleStatistics'>.uri/content/tfx/StatisticsGen/statistics/2.span0.split_names[""train"", ""eval""]['schema'] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'Schema' (1 artifact) at 0x7fc2fcec76d0.type_nameSchema._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Schema' (uri: /content/tfx/SchemaGen/schema/3) at 0x7fc2fce9cf50.type<class 'tfx.types.standard_artifacts.Schema'>.uri/content/tfx/SchemaGen/schema/3"
.component.outputs,"['anomalies'] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'ExampleAnomalies' (1 artifact) at 0x7fc2fcf09f90.type_nameExampleAnomalies._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExampleAnomalies' (uri: /content/tfx/ExampleValidator/anomalies/4) at 0x7fc3016a7950.type<class 'tfx.types.standard_artifacts.ExampleAnomalies'>.uri/content/tfx/ExampleValidator/anomalies/4.span0.split_names[""train"", ""eval""]"

0,1
.inputs,"['statistics'] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'ExampleStatistics' (1 artifact) at 0x7fc301da60d0.type_nameExampleStatistics._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExampleStatistics' (uri: /content/tfx/StatisticsGen/statistics/2) at 0x7fc301b0ef10.type<class 'tfx.types.standard_artifacts.ExampleStatistics'>.uri/content/tfx/StatisticsGen/statistics/2.span0.split_names[""train"", ""eval""]['schema'] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'Schema' (1 artifact) at 0x7fc2fcec76d0.type_nameSchema._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Schema' (uri: /content/tfx/SchemaGen/schema/3) at 0x7fc2fce9cf50.type<class 'tfx.types.standard_artifacts.Schema'>.uri/content/tfx/SchemaGen/schema/3"
.outputs,"['anomalies'] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'ExampleAnomalies' (1 artifact) at 0x7fc2fcf09f90.type_nameExampleAnomalies._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExampleAnomalies' (uri: /content/tfx/ExampleValidator/anomalies/4) at 0x7fc3016a7950.type<class 'tfx.types.standard_artifacts.ExampleAnomalies'>.uri/content/tfx/ExampleValidator/anomalies/4.span0.split_names[""train"", ""eval""]"
.exec_properties,['exclude_splits'][]

0,1
['statistics'],"function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'ExampleStatistics' (1 artifact) at 0x7fc301da60d0.type_nameExampleStatistics._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExampleStatistics' (uri: /content/tfx/StatisticsGen/statistics/2) at 0x7fc301b0ef10.type<class 'tfx.types.standard_artifacts.ExampleStatistics'>.uri/content/tfx/StatisticsGen/statistics/2.span0.split_names[""train"", ""eval""]"
['schema'],function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'Schema' (1 artifact) at 0x7fc2fcec76d0.type_nameSchema._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Schema' (uri: /content/tfx/SchemaGen/schema/3) at 0x7fc2fce9cf50.type<class 'tfx.types.standard_artifacts.Schema'>.uri/content/tfx/SchemaGen/schema/3

0,1
.type_name,ExampleStatistics
._artifacts,"[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExampleStatistics' (uri: /content/tfx/StatisticsGen/statistics/2) at 0x7fc301b0ef10.type<class 'tfx.types.standard_artifacts.ExampleStatistics'>.uri/content/tfx/StatisticsGen/statistics/2.span0.split_names[""train"", ""eval""]"

0,1
[0],"function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExampleStatistics' (uri: /content/tfx/StatisticsGen/statistics/2) at 0x7fc301b0ef10.type<class 'tfx.types.standard_artifacts.ExampleStatistics'>.uri/content/tfx/StatisticsGen/statistics/2.span0.split_names[""train"", ""eval""]"

0,1
.type,<class 'tfx.types.standard_artifacts.ExampleStatistics'>
.uri,/content/tfx/StatisticsGen/statistics/2
.span,0
.split_names,"[""train"", ""eval""]"

0,1
.type_name,Schema
._artifacts,[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Schema' (uri: /content/tfx/SchemaGen/schema/3) at 0x7fc2fce9cf50.type<class 'tfx.types.standard_artifacts.Schema'>.uri/content/tfx/SchemaGen/schema/3

0,1
[0],function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Schema' (uri: /content/tfx/SchemaGen/schema/3) at 0x7fc2fce9cf50.type<class 'tfx.types.standard_artifacts.Schema'>.uri/content/tfx/SchemaGen/schema/3

0,1
.type,<class 'tfx.types.standard_artifacts.Schema'>
.uri,/content/tfx/SchemaGen/schema/3

0,1
['anomalies'],"function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'ExampleAnomalies' (1 artifact) at 0x7fc2fcf09f90.type_nameExampleAnomalies._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExampleAnomalies' (uri: /content/tfx/ExampleValidator/anomalies/4) at 0x7fc3016a7950.type<class 'tfx.types.standard_artifacts.ExampleAnomalies'>.uri/content/tfx/ExampleValidator/anomalies/4.span0.split_names[""train"", ""eval""]"

0,1
.type_name,ExampleAnomalies
._artifacts,"[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExampleAnomalies' (uri: /content/tfx/ExampleValidator/anomalies/4) at 0x7fc3016a7950.type<class 'tfx.types.standard_artifacts.ExampleAnomalies'>.uri/content/tfx/ExampleValidator/anomalies/4.span0.split_names[""train"", ""eval""]"

0,1
[0],"function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExampleAnomalies' (uri: /content/tfx/ExampleValidator/anomalies/4) at 0x7fc3016a7950.type<class 'tfx.types.standard_artifacts.ExampleAnomalies'>.uri/content/tfx/ExampleValidator/anomalies/4.span0.split_names[""train"", ""eval""]"

0,1
.type,<class 'tfx.types.standard_artifacts.ExampleAnomalies'>
.uri,/content/tfx/ExampleValidator/anomalies/4
.span,0
.split_names,"[""train"", ""eval""]"

0,1
['exclude_splits'],[]

0,1
['statistics'],"function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'ExampleStatistics' (1 artifact) at 0x7fc301da60d0.type_nameExampleStatistics._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExampleStatistics' (uri: /content/tfx/StatisticsGen/statistics/2) at 0x7fc301b0ef10.type<class 'tfx.types.standard_artifacts.ExampleStatistics'>.uri/content/tfx/StatisticsGen/statistics/2.span0.split_names[""train"", ""eval""]"
['schema'],function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'Schema' (1 artifact) at 0x7fc2fcec76d0.type_nameSchema._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Schema' (uri: /content/tfx/SchemaGen/schema/3) at 0x7fc2fce9cf50.type<class 'tfx.types.standard_artifacts.Schema'>.uri/content/tfx/SchemaGen/schema/3

0,1
.type_name,ExampleStatistics
._artifacts,"[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExampleStatistics' (uri: /content/tfx/StatisticsGen/statistics/2) at 0x7fc301b0ef10.type<class 'tfx.types.standard_artifacts.ExampleStatistics'>.uri/content/tfx/StatisticsGen/statistics/2.span0.split_names[""train"", ""eval""]"

0,1
[0],"function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExampleStatistics' (uri: /content/tfx/StatisticsGen/statistics/2) at 0x7fc301b0ef10.type<class 'tfx.types.standard_artifacts.ExampleStatistics'>.uri/content/tfx/StatisticsGen/statistics/2.span0.split_names[""train"", ""eval""]"

0,1
.type,<class 'tfx.types.standard_artifacts.ExampleStatistics'>
.uri,/content/tfx/StatisticsGen/statistics/2
.span,0
.split_names,"[""train"", ""eval""]"

0,1
.type_name,Schema
._artifacts,[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Schema' (uri: /content/tfx/SchemaGen/schema/3) at 0x7fc2fce9cf50.type<class 'tfx.types.standard_artifacts.Schema'>.uri/content/tfx/SchemaGen/schema/3

0,1
[0],function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Schema' (uri: /content/tfx/SchemaGen/schema/3) at 0x7fc2fce9cf50.type<class 'tfx.types.standard_artifacts.Schema'>.uri/content/tfx/SchemaGen/schema/3

0,1
.type,<class 'tfx.types.standard_artifacts.Schema'>
.uri,/content/tfx/SchemaGen/schema/3

0,1
['anomalies'],"function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'ExampleAnomalies' (1 artifact) at 0x7fc2fcf09f90.type_nameExampleAnomalies._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExampleAnomalies' (uri: /content/tfx/ExampleValidator/anomalies/4) at 0x7fc3016a7950.type<class 'tfx.types.standard_artifacts.ExampleAnomalies'>.uri/content/tfx/ExampleValidator/anomalies/4.span0.split_names[""train"", ""eval""]"

0,1
.type_name,ExampleAnomalies
._artifacts,"[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExampleAnomalies' (uri: /content/tfx/ExampleValidator/anomalies/4) at 0x7fc3016a7950.type<class 'tfx.types.standard_artifacts.ExampleAnomalies'>.uri/content/tfx/ExampleValidator/anomalies/4.span0.split_names[""train"", ""eval""]"

0,1
[0],"function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExampleAnomalies' (uri: /content/tfx/ExampleValidator/anomalies/4) at 0x7fc3016a7950.type<class 'tfx.types.standard_artifacts.ExampleAnomalies'>.uri/content/tfx/ExampleValidator/anomalies/4.span0.split_names[""train"", ""eval""]"

0,1
.type,<class 'tfx.types.standard_artifacts.ExampleAnomalies'>
.uri,/content/tfx/ExampleValidator/anomalies/4
.span,0
.split_names,"[""train"", ""eval""]"


In [20]:
context.show(example_validator.outputs['anomalies'])

  pd.set_option('max_colwidth', -1)


From the output aboce, examplevalidator found no anomalies in the data

In [21]:
# Get the URI of the output artifact representing the transformed examples, which is a directory
train_uri = os.path.join(example_gen.outputs['examples'].get()[0].uri, 'Split-train')

# Get the list of files in this directory (all compressed TFRecord files)
tfrecord_filenames = [os.path.join(train_uri, name)
                      for name in os.listdir(train_uri)]

# Create a `TFRecordDataset` to read these files
dataset = tf.data.TFRecordDataset(tfrecord_filenames, compression_type="GZIP")

# Iterate over the first 1 records and decode them.
for tfrecord in dataset.take(1):
  serialized_example = tfrecord.numpy()
  example = tf.train.Example()
  example.ParseFromString(serialized_example)
  pp.pprint(example)

features {
  feature {
    key: "dropoff_latitude"
    value {
      float_list {
        value: 41.92045211791992
      }
    }
  }
  feature {
    key: "dropoff_longitude"
    value {
      float_list {
        value: -87.6799545288086
      }
    }
  }
  feature {
    key: "fare"
    value {
      float_list {
        value: 3.8499999046325684
      }
    }
  }
  feature {
    key: "pickup_latitude"
    value {
      float_list {
        value: 41.8996696472168
      }
    }
  }
  feature {
    key: "pickup_longitude"
    value {
      float_list {
        value: -87.66983795166016
      }
    }
  }
  feature {
    key: "trip_start_day"
    value {
      int64_list {
        value: 6
      }
    }
  }
  feature {
    key: "trip_start_hour"
    value {
      int64_list {
        value: 15
      }
    }
  }
  feature {
    key: "trip_start_month"
    value {
      int64_list {
        value: 3
      }
    }
  }
}



In [22]:
#binning latitude and longitude
bins_lat = pd.qcut(list(df['dropoff_latitude'].values) + list(df['pickup_latitude'].values), q=20, duplicates='drop', retbins=True)[1]
bins_lon = pd.qcut(list(df['dropoff_longitude'].values) + list(df['pickup_longitude'].values), q=20, duplicates='drop', retbins=True)[1]

In [23]:
code = '''
bins_lat = {bins_lat}
bins_lon = {bins_lon}
'''

code = code.replace('{bins_lat}', str(list(bins_lat)))
code = code.replace('{bins_lon}', str(list(bins_lon)))

with open(_constants_module_file, 'w') as writefile:
    writefile.write(code)

## **Trainer**

In [29]:
%%writefile {_input_fn_module_file}

import os
import tensorflow as tf
import geo.sphere

#Feature engineering functions
def feature_engg_features(features):
  pickup = (features['pickup_latitude'], features['pickup_longitude'])
  dropoff = (features['dropoff_latitude'], features['dropoff_longitude'])
  #Add new features for great circle distance 
  features['distance'] = ((features['pickup_latitude'] - features['dropoff_latitude'])**2 +  (features['pickup_longitude'] - features['dropoff_longitude'])**2)**0.5
  features['trip_start_month'] = tf.strings.as_string(features['trip_start_month'])
  features['trip_start_hour'] = tf.strings.as_string(features['trip_start_hour'])
  features['trip_start_day'] = tf.strings.as_string(features['trip_start_day'])

  return(features)

#To be called from TF
def feature_engg(features, label):
  #Add new features
  features = feature_engg_features(features)

  return(features, label)

def make_input_fn(dir_uri, mode, vnum_epochs = None, batch_size = 512):
    def decode_tfr(serialized_example):
      # 1. define a parser
      features = tf.io.parse_example(
        serialized_example,
        # Defaults are not specified since both keys are required.
        features={
            'dropoff_latitude': tf.io.FixedLenFeature([], tf.float32),
            'dropoff_longitude': tf.io.FixedLenFeature([], tf.float32),
            'fare': tf.io.FixedLenFeature([], tf.float32),
            'pickup_latitude': tf.io.FixedLenFeature([], tf.float32, default_value = 0.0),
            'pickup_longitude': tf.io.FixedLenFeature([], tf.float32, default_value = 0.0),
            'trip_start_day': tf.io.FixedLenFeature([], tf.int64),
            'trip_start_hour': tf.io.FixedLenFeature([], tf.int64),
            'trip_start_month': tf.io.FixedLenFeature([], tf.int64),
            'great_circle_distance': tf.io.FixedLenFeature([], tf.float32, default_value = 0.0)
        })

      return features, features['fare']

    def _input_fn(v_test=False):
      # Get the list of files in this directory (all compressed TFRecord files)
      tfrecord_filenames = tf.io.gfile.glob(dir_uri)

      # Create a `TFRecordDataset` to read these files
      dataset = tf.data.TFRecordDataset(tfrecord_filenames, compression_type="GZIP")

      if mode == tf.estimator.ModeKeys.TRAIN:
        num_epochs = vnum_epochs # indefinitely
      else:
        num_epochs = 1 # end-of-input after this


Overwriting inputfn_trainer.py


In [None]:
import inputfn_trainer as ift

eval_file = os.path.join(example_gen.outputs['examples'].get()[0].uri, 'Split-eval/*')
fn_d = ift.make_input_fn(dir_uri = eval_file,
                    mode = tf.estimator.ModeKeys.EVAL,
                    batch_size = 10)

fn_d(v_test=True)

In [32]:
%%writefile {_model_trainer_module_file}

import tensorflow as tf
import tensorflow.keras as keras
import inputfn_trainer as ift
import constants_trainer as ct

from tfx.components.trainer.fn_args_utils import FnArgs
print(tf.__version__)

device = "gpu"

if device == "tpu":
  resolver = tf.distribute.cluster_resolver.TPUClusterResolver(tpu='grpc://' + os.environ['COLAB_TPU_ADDR'])
  tf.config.experimental_connect_to_cluster(resolver)
  # This is the TPU initialization code that has to be at the beginning.
  tf.tpu.experimental.initialize_tpu_system(resolver)
  strategy = tf.distribute.experimental.TPUStrategy(resolver)
else:
  strategy = tf.distribute.MultiWorkerMirroredStrategy()

#Create model
params_default = {
    'lr' : 0.001,
    'beta_1' : 0.99,
    'beta_2' : 0.999,
    'epsilon' : 1e-08,
    'decay' : 0.01,
    'hidden_layers' : 1
}

# Define feature columns(Including feature engineered ones )
# These are the features which come from the TF Data pipeline
def create_feature_cols():
    #Keras format features
    k_month = tf.keras.Input(name='trip_start_month', shape=(1,), dtype=tf.string)
    k_hour  = tf.keras.Input(name='trip_start_hour', shape=(1,), dtype=tf.string)
    k_day  = tf.keras.Input(name='trip_start_day', shape=(1,), dtype=tf.string)
    k_picklat  = tf.keras.Input(name='pickup_latitude', shape=(1,), dtype=tf.float32)
    k_picklon  = tf.keras.Input(name='pickup_longitude', shape=(1,), dtype=tf.float32)
    k_droplat  = tf.keras.Input(name='dropoff_latitude', shape=(1,), dtype=tf.float32)
    k_droplon  = tf.keras.Input(name='dropoff_longitude', shape=(1,), dtype=tf.float32)
    k_distance  = tf.keras.Input(name='distance', shape=(1,), dtype=tf.float32)
    keras_dict_input = {'trip_start_month': k_month, 'trip_start_hour': k_hour, 'trip_start_day' : k_day,
                        'pickup_latitude': k_picklat, 'pickup_longitude': k_picklon,
                        'dropoff_latitude': k_droplat, 'dropoff_longitude': k_droplon, 'distance' : k_distance
                        }

    return({'K' : keras_dict_input})

def create_keras_model(feature_cols, bins_lat, bins_lon,  params = params_default):
    METRICS = [
            keras.metrics.RootMeanSquaredError(name='rmse')
    ]

    #Input layers
    input_feats = []
    for inp in feature_cols['K'].keys():
      input_feats.append(feature_cols['K'][inp])

    ##Handle categorical attributes( One-hot encoding )
    cat_day = tf.keras.layers.experimental.preprocessing.StringLookup(vocabulary=['1','2','3','4','5','6','7'], mask_token=None)(feature_cols['K']['trip_start_day'])
    cat_day = tf.keras.layers.experimental.preprocessing.CategoryEncoding(max_tokens=7)(cat_day)

    cat_hour = tf.keras.layers.experimental.preprocessing.StringLookup(vocabulary=['1','2','3','4','5','6','7','8'
                                                                                      '9','10','11','12','13','14','15','16',
                                                                                      '17','18','19','20','21','22','23','0'
                                                                                      ], mask_token=None)(feature_cols['K']['trip_start_hour'])
    cat_hour = tf.keras.layers.experimental.preprocessing.CategoryEncoding(max_tokens=24)(cat_hour)

    cat_month = tf.keras.layers.experimental.preprocessing.StringLookup(vocabulary=['1','2','3','4','5','6','7','8'
                                                                                      '9','10','11','12'], mask_token=None)(feature_cols['K']['trip_start_month'])
    cat_month = tf.keras.layers.experimental.preprocessing.CategoryEncoding(max_tokens=12)(cat_month)

    # cat_company = tf.keras.layers.experimental.preprocessing.StringLookup(vocabulary=df['company'].unique(), mask_token=None)(feature_cols['K']['company'])
    # cat_company = tf.keras.layers.experimental.preprocessing.CategoryEncoding(max_tokens=len(df['company'].unique()))(cat_company)

    ##Binning
    bins_pickup_lat = tf.keras.layers.experimental.preprocessing.Discretization(bins = bins_lat)(feature_cols['K']['pickup_latitude'])
    cat_pickup_lat = tf.keras.layers.experimental.preprocessing.CategoryEncoding(len(bins_lat)+1)(bins_pickup_lat)

    bins_pickup_lon = tf.keras.layers.experimental.preprocessing.Discretization(bins = bins_lon)(feature_cols['K']['pickup_longitude'])
    cat_pickup_lon = tf.keras.layers.experimental.preprocessing.CategoryEncoding(len(bins_lon)+1)(bins_pickup_lon)

    bins_drop_lat = tf.keras.layers.experimental.preprocessing.Discretization(bins = bins_lat)(feature_cols['K']['dropoff_latitude'])
    cat_drop_lat = tf.keras.layers.experimental.preprocessing.CategoryEncoding(len(bins_lat)+1)(bins_drop_lat)

    bins_drop_lon = tf.keras.layers.experimental.preprocessing.Discretization(bins = bins_lon)(feature_cols['K']['dropoff_longitude'])
    cat_drop_lon = tf.keras.layers.experimental.preprocessing.CategoryEncoding(len(bins_lon)+1)(bins_drop_lon)

    ##Categorical cross
    cross_day_hour = tf.keras.layers.experimental.preprocessing.CategoryCrossing()([cat_day, cat_hour])
    hash_cross_day_hour = tf.keras.layers.experimental.preprocessing.Hashing(num_bins=24 * 7)(cross_day_hour)
    cat_cross_day_hour = tf.keras.layers.experimental.preprocessing.CategoryEncoding(max_tokens = 24* 7)(hash_cross_day_hour)

    cross_pick_lon_lat = tf.keras.layers.experimental.preprocessing.CategoryCrossing()([cat_pickup_lat, cat_pickup_lon])
    hash_cross_pick_lon_lat = tf.keras.layers.experimental.preprocessing.Hashing(num_bins=(len(bins_lat) + 1) ** 2)(cross_pick_lon_lat)

    cross_drop_lon_lat = tf.keras.layers.experimental.preprocessing.CategoryCrossing()([cat_drop_lat, cat_drop_lon])
    hash_cross_drop_lon_lat = tf.keras.layers.experimental.preprocessing.Hashing(num_bins=(len(bins_lat) + 1) ** 2)(cross_drop_lon_lat)

    
    embed_cross_pick_lon_lat = tf.keras.layers.Embedding(((len(bins_lat) + 1) ** 2), 4)(hash_cross_pick_lon_lat)
    embed_cross_pick_lon_lat = tf.reduce_sum(embed_cross_pick_lon_lat, axis=-2)

    embed_cross_drop_lon_lat = tf.keras.layers.Embedding(((len(bins_lat) + 1) ** 2), 4)(hash_cross_drop_lon_lat)
    embed_cross_drop_lon_lat = tf.reduce_sum(embed_cross_drop_lon_lat, axis=-2)

    int_trip_start_day = tf.strings.to_number(feature_cols['K']['trip_start_day'], tf.float32)
    int_trip_start_hour = tf.strings.to_number(feature_cols['K']['trip_start_hour'], tf.float32)
    int_trip_start_month = tf.strings.to_number(feature_cols['K']['trip_start_month'], tf.float32)

    #Add feature engineered columns - LAMBDA layer

    ###Create MODEL
    ####Concatenate all features( Numerical input )
    x_input_numeric = tf.keras.layers.concatenate([
                    feature_cols['K']['pickup_latitude'], feature_cols['K']['pickup_longitude'],
                    feature_cols['K']['dropoff_latitude'], feature_cols['K']['dropoff_longitude'],
                    feature_cols['K']['distance'], embed_cross_pick_lon_lat, embed_cross_drop_lon_lat,
                    int_trip_start_day, int_trip_start_hour, int_trip_start_month
                    ])

    #DEEP - This Dense layer connects to input layer - Numeric Data
    x_numeric = tf.keras.layers.Dense(32, activation='relu', kernel_initializer="he_uniform")(x_input_numeric)
    x_numeric = tf.keras.layers.BatchNormalization()(x_numeric)

    ####Concatenate all Categorical features( Categorical converted )
    x_input_categ = tf.keras.layers.concatenate([
                    cat_month, cat_cross_day_hour, cat_pickup_lat, cat_pickup_lon,
                    cat_drop_lat, cat_drop_lon
                    ])
    
    #WIDE - This Dense layer connects to input layer - Categorical Data
    x_categ = tf.keras.layers.Dense(32, activation='relu', kernel_initializer="he_uniform")(x_input_categ)

    ####Concatenate both Wide and Deep layers
    x = tf.keras.layers.concatenate([x_categ, x_numeric])

    for l_ in range(params['hidden_layers']):
        x = tf.keras.layers.Dense(32, activation='relu', kernel_initializer="he_uniform",
                                  activity_regularizer=tf.keras.regularizers.l2(0.00001))(x)
        x = tf.keras.layers.BatchNormalization()(x)

    #Final Layer
    out = tf.keras.layers.Dense(1, activation='relu')(x)
    model = tf.keras.Model(input_feats, out)

    #Set optimizer
    opt = tf.keras.optimizers.Adam(lr= params['lr'], beta_1=params['beta_1'], 
                                        beta_2=params['beta_2'], epsilon=params['epsilon'])

    #Compile model
    model.compile(loss='mean_squared_error',  optimizer=opt, metrics = METRICS)

    #Print Summary
    print(model.summary())
    return model

def keras_train_and_evaluate(model, train_dataset, validation_dataset, epochs=100):
  #Add callbacks
  reduce_lr = keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.2,
                                patience=5, min_lr=0.00001, verbose = 1)
  
  tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir="./logs")

  #Train and Evaluate
  out = model.fit(train_dataset, 
                  validation_data = validation_dataset,
                  epochs=epochs,
                  # validation_steps = 3,   ###Keep this none for running evaluation on full EVAL data every epoch
                  steps_per_epoch = 100,   ###Has to be passed - Cant help it :) [ Number of batches per epoch ]
                  callbacks=[reduce_lr, #modelsave_callback, #tensorboard_callback, 
                             keras.callbacks.EarlyStopping(patience=20, restore_best_weights=True, verbose=True)]
                  )

  return model

def save_model(model, model_save_path):
  @tf.function
  def serving(dropoff_latitude, dropoff_longitude, pickup_latitude, pickup_longitude, trip_start_day, trip_start_hour, trip_start_month):
      ##Feature engineering( calculate distance )
      distance = tf.cast( tf.sqrt((tf.abs(dropoff_latitude - pickup_latitude))**2 + (tf.abs(dropoff_longitude - pickup_longitude))**2), tf.float32)

      payload = {
          'dropoff_latitude': dropoff_latitude,
          'dropoff_longitude': dropoff_longitude,
          'pickup_latitude': pickup_latitude,
          'pickup_longitude': pickup_longitude,
          'trip_start_day': trip_start_day,
          'trip_start_hour': trip_start_hour,
          'trip_start_month': trip_start_month,
          'distance': distance
      }
      
      ## Predict
      ##IF THERE IS AN ERROR IN NUMBER OF PARAMS PASSED HERE OR DATA TYPE THEN IT GIVES ERROR, "COULDN'T COMPUTE OUTPUT TENSOR"
      predictions = model(payload)
      return predictions

  serving = serving.get_concrete_function(trip_start_day=tf.TensorSpec([None,], dtype= tf.string, name='trip_start_day'), 
                                          trip_start_hour=tf.TensorSpec([None,], dtype= tf.string, name='trip_start_hour'),
                                          trip_start_month=tf.TensorSpec([None], dtype= tf.string, name='trip_start_month'), 
                                          dropoff_latitude=tf.TensorSpec([None,], dtype= tf.float32, name='dropoff_latitude'),
                                          dropoff_longitude=tf.TensorSpec([None,], dtype= tf.float32, name='dropoff_longitude'), 
                                          pickup_latitude=tf.TensorSpec([None,], dtype= tf.float32, name='pickup_latitude'),
                                          pickup_longitude=tf.TensorSpec([None,], dtype= tf.float32, name='pickup_longitude')
                                          )

  # version = "1"  #{'serving_default': call_output}
  tf.saved_model.save(
      model,
      model_save_path + "/",
      signatures=serving
  )

##Main function called by TFX
def run_fn(fn_args: FnArgs):
  #Create dataset input functions
  train_dataset = ift.make_input_fn(dir_uri = fn_args.train_files,
                      mode = tf.estimator.ModeKeys.TRAIN,
                      batch_size = 128)()

  validation_dataset = ift.make_input_fn(dir_uri = fn_args.eval_files,
                      mode = tf.estimator.ModeKeys.EVAL,
                      batch_size = 512)()

  #Create model
  m_ = create_keras_model(params = params_default, feature_cols = create_feature_cols(),
                          bins_lat = ct.bins_lat,
                          bins_lon = ct.bins_lon)
  tf.keras.utils.plot_model(m_, show_shapes=True, rankdir="LR")

  #Train model
  m_ = keras_train_and_evaluate(m_, train_dataset, validation_dataset, fn_args.custom_config['epochs'])

  #Save model with custom signature
  save_model(m_, fn_args.serving_model_dir)

Writing model_trainer.py


In [None]:
#training the model in tensorflow
trainer = Trainer(
    module_file=os.path.abspath(_model_trainer_module_file),
    custom_executor_spec=executor_spec.ExecutorClassSpec(GenericExecutor),
    examples=example_gen.outputs['examples'],
    train_args=trainer_pb2.TrainArgs(),
    eval_args=trainer_pb2.EvalArgs(),
    custom_config=({"epochs": 1})
    )

context.run(trainer)

**Pusher**

In [34]:
pusher = Pusher(
    model=trainer.outputs['model'],
    push_destination=pusher_pb2.PushDestination(
        filesystem=pusher_pb2.PushDestination.Filesystem(
            base_directory=_serving_model_dir)))
context.run(pusher)



0,1
.execution_id,6
.component,"function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Pusher at 0x7fc301bee4d0.inputs['model'] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'Model' (1 artifact) at 0x7fc301bd9350.type_nameModel._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Model' (uri: /content/tfx/Trainer/model/5) at 0x7fc301721550.type<class 'tfx.types.standard_artifacts.Model'>.uri/content/tfx/Trainer/model/5.outputs['pushed_model'] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'PushedModel' (1 artifact) at 0x7fc301bee810.type_namePushedModel._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'PushedModel' (uri: /content/tfx/Pusher/pushed_model/6) at 0x7fc30168f250.type<class 'tfx.types.standard_artifacts.PushedModel'>.uri/content/tfx/Pusher/pushed_model/6.exec_properties['push_destination']{  ""filesystem"": {  ""base_directory"": ""/content/tfx/serving_model""  } }['custom_config']null"
.component.inputs,['model'] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'Model' (1 artifact) at 0x7fc301bd9350.type_nameModel._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Model' (uri: /content/tfx/Trainer/model/5) at 0x7fc301721550.type<class 'tfx.types.standard_artifacts.Model'>.uri/content/tfx/Trainer/model/5
.component.outputs,['pushed_model'] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'PushedModel' (1 artifact) at 0x7fc301bee810.type_namePushedModel._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'PushedModel' (uri: /content/tfx/Pusher/pushed_model/6) at 0x7fc30168f250.type<class 'tfx.types.standard_artifacts.PushedModel'>.uri/content/tfx/Pusher/pushed_model/6

0,1
.inputs,['model'] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'Model' (1 artifact) at 0x7fc301bd9350.type_nameModel._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Model' (uri: /content/tfx/Trainer/model/5) at 0x7fc301721550.type<class 'tfx.types.standard_artifacts.Model'>.uri/content/tfx/Trainer/model/5
.outputs,['pushed_model'] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'PushedModel' (1 artifact) at 0x7fc301bee810.type_namePushedModel._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'PushedModel' (uri: /content/tfx/Pusher/pushed_model/6) at 0x7fc30168f250.type<class 'tfx.types.standard_artifacts.PushedModel'>.uri/content/tfx/Pusher/pushed_model/6
.exec_properties,"['push_destination']{  ""filesystem"": {  ""base_directory"": ""/content/tfx/serving_model""  } }['custom_config']null"

0,1
['model'],function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'Model' (1 artifact) at 0x7fc301bd9350.type_nameModel._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Model' (uri: /content/tfx/Trainer/model/5) at 0x7fc301721550.type<class 'tfx.types.standard_artifacts.Model'>.uri/content/tfx/Trainer/model/5

0,1
.type_name,Model
._artifacts,[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Model' (uri: /content/tfx/Trainer/model/5) at 0x7fc301721550.type<class 'tfx.types.standard_artifacts.Model'>.uri/content/tfx/Trainer/model/5

0,1
[0],function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Model' (uri: /content/tfx/Trainer/model/5) at 0x7fc301721550.type<class 'tfx.types.standard_artifacts.Model'>.uri/content/tfx/Trainer/model/5

0,1
.type,<class 'tfx.types.standard_artifacts.Model'>
.uri,/content/tfx/Trainer/model/5

0,1
['pushed_model'],function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'PushedModel' (1 artifact) at 0x7fc301bee810.type_namePushedModel._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'PushedModel' (uri: /content/tfx/Pusher/pushed_model/6) at 0x7fc30168f250.type<class 'tfx.types.standard_artifacts.PushedModel'>.uri/content/tfx/Pusher/pushed_model/6

0,1
.type_name,PushedModel
._artifacts,[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'PushedModel' (uri: /content/tfx/Pusher/pushed_model/6) at 0x7fc30168f250.type<class 'tfx.types.standard_artifacts.PushedModel'>.uri/content/tfx/Pusher/pushed_model/6

0,1
[0],function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'PushedModel' (uri: /content/tfx/Pusher/pushed_model/6) at 0x7fc30168f250.type<class 'tfx.types.standard_artifacts.PushedModel'>.uri/content/tfx/Pusher/pushed_model/6

0,1
.type,<class 'tfx.types.standard_artifacts.PushedModel'>
.uri,/content/tfx/Pusher/pushed_model/6

0,1
['push_destination'],"{  ""filesystem"": {  ""base_directory"": ""/content/tfx/serving_model""  } }"
['custom_config'],

0,1
['model'],function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'Model' (1 artifact) at 0x7fc301bd9350.type_nameModel._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Model' (uri: /content/tfx/Trainer/model/5) at 0x7fc301721550.type<class 'tfx.types.standard_artifacts.Model'>.uri/content/tfx/Trainer/model/5

0,1
.type_name,Model
._artifacts,[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Model' (uri: /content/tfx/Trainer/model/5) at 0x7fc301721550.type<class 'tfx.types.standard_artifacts.Model'>.uri/content/tfx/Trainer/model/5

0,1
[0],function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Model' (uri: /content/tfx/Trainer/model/5) at 0x7fc301721550.type<class 'tfx.types.standard_artifacts.Model'>.uri/content/tfx/Trainer/model/5

0,1
.type,<class 'tfx.types.standard_artifacts.Model'>
.uri,/content/tfx/Trainer/model/5

0,1
['pushed_model'],function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'PushedModel' (1 artifact) at 0x7fc301bee810.type_namePushedModel._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'PushedModel' (uri: /content/tfx/Pusher/pushed_model/6) at 0x7fc30168f250.type<class 'tfx.types.standard_artifacts.PushedModel'>.uri/content/tfx/Pusher/pushed_model/6

0,1
.type_name,PushedModel
._artifacts,[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'PushedModel' (uri: /content/tfx/Pusher/pushed_model/6) at 0x7fc30168f250.type<class 'tfx.types.standard_artifacts.PushedModel'>.uri/content/tfx/Pusher/pushed_model/6

0,1
[0],function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'PushedModel' (uri: /content/tfx/Pusher/pushed_model/6) at 0x7fc30168f250.type<class 'tfx.types.standard_artifacts.PushedModel'>.uri/content/tfx/Pusher/pushed_model/6

0,1
.type,<class 'tfx.types.standard_artifacts.PushedModel'>
.uri,/content/tfx/Pusher/pushed_model/6


In [35]:
pusher.outputs

{'pushed_model': Channel(
    type_name: PushedModel
    artifacts: [Artifact(artifact: id: 7
type_id: 16
uri: "/content/tfx/Pusher/pushed_model/6"
custom_properties {
  key: "name"
  value {
    string_value: "pushed_model"
  }
}
custom_properties {
  key: "producer_component"
  value {
    string_value: "Pusher"
  }
}
custom_properties {
  key: "pushed"
  value {
    int_value: 1
  }
}
custom_properties {
  key: "pushed_destination"
  value {
    string_value: "/content/tfx/serving_model/1619413569"
  }
}
custom_properties {
  key: "pushed_version"
  value {
    string_value: "1619413569"
  }
}
custom_properties {
  key: "state"
  value {
    string_value: "published"
  }
}
custom_properties {
  key: "tfx_version"
  value {
    string_value: "0.29.0"
  }
}
state: LIVE
, artifact_type: id: 16
name: "PushedModel"
)]
    additional_properties: {}
    additional_custom_properties: {}
)}

In [None]:
push_uri = pusher.outputs.pushed_model.get()[0].uri
model = tf.saved_model.load(push_uri)

for item in model.signatures.items():
  pp.pprint(item)

**End to end pipeline**

In [37]:
!rm -rf data.*
# !rm -rf *trainer.py ##EDIT: Python files have to be retained
!rm -rf *.csv
!sudo rm -r /content/tfx

! cd /content/
! mkdir /content/tfx/
! mkdir /content/tfx/pipelines
! mkdir /content/tfx/metadata
! mkdir /content/tfx/logs
! mkdir /content/tfx/data
! mkdir /content/tfx/serving_model

! mkdir /content/train_data/
! mkdir /content/eval_data/

!wget https://raw.githubusercontent.com/tensorflow/tfx/master/tfx/examples/chicago_taxi_pipeline/data/simple/data.csv

--2021-04-26 05:07:27--  https://raw.githubusercontent.com/tensorflow/tfx/master/tfx/examples/chicago_taxi_pipeline/data/simple/data.csv
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.111.133, 185.199.109.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 1922812 (1.8M) [text/plain]
Saving to: ‘data.csv’


2021-04-26 05:07:27 (48.4 MB/s) - ‘data.csv’ saved [1922812/1922812]



In [39]:


df = pd.read_csv('/content/data.csv')

##Drop useless columns
df = df.drop(['trip_start_timestamp','trip_miles','pickup_census_tract',
              'dropoff_census_tract','trip_seconds','payment_type','tips', 
              'company','dropoff_community_area','pickup_community_area'], axis=1)

#Drop null rows
df = df.dropna()

#test train split
np.random.seed(seed=2)
msk = np.random.rand(len(df)) < 0.9
traindf = df[msk]
evaldf = df[~msk]

print(len(traindf))
print(len(evaldf))

traindf.to_csv("/content/train_data/data.csv", index=False, header=True)
evaldf.to_csv("/content/eval_data/eval.csv", index=False, header=False)



13077
1442


In [43]:
from typing import Dict, List, Text

# https://github.com/tensorflow/tfx/blob/master/tfx/examples/chicago_taxi_pipeline/
def create_final_pipeline(
    pipeline_name: Text,
    root_path: Text,
    data_path: Text,
    training_params: Dict[Text, Text],
    # beam_pipeline_args: List[Text],
) -> pipeline.Pipeline:

  _pipeline_root = os.path.join(root_path, 'pipelines');      # Join ~/tfx/pipelines/
  _metadata_db_root = os.path.join(root_path, 'metadata.db');    # Join ~/tfx/metadata.db
  _log_root = os.path.join(root_path, 'logs');
  _model_root = os.path.join(root_path, 'model');
  _serving_model_dir = os.path.join(root_path, 'serving_model')

  # Full pipeline
  example_gen = CsvExampleGen(input=external_input(data_path))

  statistics_gen = StatisticsGen(examples=example_gen.outputs['examples'])

  infer_schema = SchemaGen(
      statistics=statistics_gen.outputs['statistics'], infer_feature_shape=False)

  validate_stats = ExampleValidator(
    statistics=statistics_gen.outputs['statistics'],
    schema=infer_schema.outputs['schema'])

  trainer = Trainer(
      module_file=os.path.abspath(_model_trainer_module_file),
      custom_executor_spec=executor_spec.ExecutorClassSpec(GenericExecutor),
      examples=example_gen.outputs['examples'],
      train_args=trainer_pb2.TrainArgs(),
      eval_args=trainer_pb2.EvalArgs(),
      custom_config=(training_params)
      )

  pusher = Pusher(
      model=trainer.outputs['model'],
      push_destination=pusher_pb2.PushDestination(
          filesystem=pusher_pb2.PushDestination.Filesystem(
              base_directory=_serving_model_dir)))

  # This pipeline obj carries the business logic of the pipeline, but no runner-specific information
  # was included.
  return pipeline.Pipeline(
    pipeline_name=  pipeline_name,
    pipeline_root=  root_path,
    components=[
        example_gen, statistics_gen, infer_schema, validate_stats,
        trainer, pusher
    ],
    # metadata_connection_config = metadata.sqlite_metadata_connection_config(_metadata_db_root),
    metadata_connection_config = metadata.sqlite_metadata_connection_config(_metadata_db_root),
    enable_cache=True,
    beam_pipeline_args=['--direct_num_workers=%d' % 0],
  )

In [None]:
#Run pipeline locally
from tfx.orchestration.local.local_dag_runner import LocalDagRunner

##Define all paths
_tfx_root = os.path.join(os.getcwd(), 'tfx')

#Config params
training_params = {"epochs": 50}

#Create and run pipeline
p_ = create_final_pipeline(root_path = _tfx_root, 
                           pipeline_name="local_pipeline", 
                           data_path="/content/train_data",
                           training_params=training_params)

LocalDagRunner().run(p_)