# TF Transform Example


### First, install the prerequisites. 

In [None]:
try:
  import colab
  !pip install --upgrade pip
except:
  pass

In [None]:
!pip install -U tfx

## Restart the runtime?

__Warning:__ If you are using Google Colab, the first time that you run
the cell above, you must restart the runtime by clicking
above "RESTART RUNTIME" button or using "Runtime > Restart
runtime ..." menu. This is because of the way that Colab
loads packages.

In [None]:
import pprint
import tempfile

import tensorflow as tf
import tensorflow_transform as tft
import tensorflow_transform.beam as tft_beam
from tensorflow_transform.tf_metadata import dataset_metadata
from tensorflow_transform.tf_metadata import schema_utils

### Create a Schema 

The schema describes the RAW data fields that will be passed into the procerssing function. 

In [None]:
_RAW_DATA_METADATA = dataset_metadata.DatasetMetadata(
    schema_utils.schema_from_feature_spec({
        'species': tf.io.FixedLenFeature([], tf.string),
        'weight': tf.io.FixedLenFeature([], tf.float32),
        'speed': tf.io.FixedLenFeature([], tf.float32),
    }))

### Just make up dome raw data that will be transformed. 

In [None]:
_RAW_DATA = [{
    'weight': 235.43,
    'species': 'elephant',
    'speed': 15
}, {
    'weight': 2.7,
    'species': 'chicken',
    'speed': 25
}, {
    'weight': 325.432,
    'species': 'pig',
    'speed': 10
},{
    'weight': 21.34,
    'species': 'dog',
    'speed': 15
},{
    'weight': 10.5,
    'species': 'cat',
    'speed': 7
},{
    'weight': 40,
    'species': 'dog',
    'speed': 30
},{
    'weight': 200,
    'species': 'human',
    'speed': 17
}]

### Define a processing Function

This function inputs the raw data, uses TF Transform to alter the data, and then returns the processed data. 

In [None]:
def _preprocessing_fn(inputs):
  """Preprocess input columns into transformed columns."""
  weight = inputs['weight']
  species = inputs['species']
  speed = inputs['speed']
  weight_centered = weight - tft.mean(weight)
  speed_normalized = tft.scale_to_0_1(speed)
  species_integerized = tft.compute_and_apply_vocabulary(species)
  return {
      'weight_centered': weight_centered,
      'species_integerized': species_integerized,
      'speed_normalized': speed_normalized
  }

### Process the data using TFT Beam

In [None]:
with tft_beam.Context(temp_dir=tempfile.mkdtemp()):
  transformed_dataset, transform_fn = ( 
      (_RAW_DATA, _RAW_DATA_METADATA)
      | tft_beam.AnalyzeAndTransformDataset(_preprocessing_fn))

transformed_data, transformed_metadata = transformed_dataset  

pprint.pprint(transformed_data)