In [2]:
from __future__ import print_function, division, absolute_import
import tensorflow as tf
import tensorflow_model_analysis as tfma
from tensorflow_model_analysis.eval_saved_model.post_export_metrics import post_export_metrics
from tensorflow_model_analysis.slicer import slicer
from tensorflow_transform.coders import example_proto_coder
from tensorflow_transform.tf_metadata import dataset_schema
from tensorflow_transform import coders as tft_coders
import apache_beam as beam
from config import REGION, BUCKET, PROJECT, DELIM, RENAMED_COLS, STRING_COLS, NUMERIC_COLS, LABEL_COL, TOKENIZE_COL, NGRAM_RANGE, MAX_TOKENS
tf.logging.set_verbosity(tf.logging.INFO)

  from ._conv import register_converters as _register_converters
  from .murmurhash import murmurhash3_32
  from ._trlib import TRLIBQuadraticSubproblem
  from ._group_columns import group_dense, group_sparse
  from .lbfgsb import _minimize_lbfgsb
  from . import _stats
  from ._logistic_sigmoid import _log_logistic_sigmoid
  from .expected_mutual_info_fast import expected_mutual_information
  from .pairwise_fast import _chi2_kernel_fast, _sparse_manhattan


In [9]:
slice_spec = [
    slicer.SingleSliceSpec()
]


def process_tfma(on_cloud=False):
    import tensorflow_model_analysis as tfma
    from tensorflow_model_analysis.eval_saved_model.post_export_metrics import post_export_metrics
    from tensorflow_model_analysis.slicer import slicer
    from tensorflow_transform.coders import example_proto_coder
    from tensorflow_transform.tf_metadata import dataset_schema
    from tensorflow_transform import coders as tft_coders
    from config import PROJECT, BUCKET
    import os
    import datetime
    from glob import glob
    import snappy
    
    job_name = 'preprocess-for-tfma-{project}-'.format(project=PROJECT) + datetime.datetime.now().strftime('%Y%m%d-%H%M%S')
    
    if on_cloud:
        print('Launching Dataflow job {} ... hang on'.format(job_name))
        OUTPUT_DIR = 'gs://{bucket}/{project}/model_trained/eval/tfma/evaluated'.format(bucket=BUCKET, project=PROJECT)
        import subprocess
        subprocess.call('gsutil rm -r {}'.format(OUTPUT_DIR).split())
    else:
        import shutil
        print('Launching local job ... hang on')
        OUTPUT_DIR = './model_trained/eval/tfma/evaluated'
        shutil.rmtree(OUTPUT_DIR, ignore_errors=True)

    options = {
        'staging_location': os.path.join(OUTPUT_DIR, 'tmp', 'staging'),
        'temp_location': os.path.join(OUTPUT_DIR, 'tmp'),
        'job_name': job_name,
        'project': PROJECT,
        'max_num_workers': 24,
        'teardown_policy': 'TEARDOWN_ALWAYS',
        'no_save_main_session': True,
        'requirements_file': 'requirements.txt'
    }
    opts = beam.pipeline.PipelineOptions(flags=[], **options)
    
    if on_cloud:
        RUNNER = 'DataflowRunner'
        input_tfrecords_pattern = 'gs://{bucket}/{project}/data/tft/eval*.gz'.format(bucket=BUCKET, project=PROJECT)
    else:
        RUNNER = 'DirectRunner'
        input_tfrecords_pattern = './data/tft/eval*.gz'
        
        
    with beam.Pipeline(RUNNER, options=opts) as pipeline:       
        eval_data = (
            pipeline | 
            'read_eval_data' >> beam.io.ReadFromTFRecord(input_tfrecords_pattern)
        )
        
        _ = (
            eval_data
            | 'evaluate_and_write_results' >> tfma.EvaluateAndWriteResults(
                eval_saved_model_path=glob('model_trained/eval/tfma/*')[-1],
                slice_spec=slice_spec,
#                 add_metrics_callbacks=[
#                     post_export_metrics.calibration_plot_and_prediction_histogram(),
#                     post_export_metrics.auc_plots()
#                 ],
                output_path=OUTPUT_DIR
            )
        )

In [10]:
process_tfma()

Launching local job ... hang on
INFO:tensorflow:Restoring parameters from model_trained/eval/tfma/1534067348/variables/variables


INFO:tensorflow:Restoring parameters from model_trained/eval/tfma/1534067348/variables/variables


INFO:tensorflow:Restoring parameters from model_trained/eval/tfma/1534067348/variables/variables


INFO:tensorflow:Restoring parameters from model_trained/eval/tfma/1534067348/variables/variables


INFO:tensorflow:Restoring parameters from model_trained/eval/tfma/1534067348/variables/variables


INFO:tensorflow:Restoring parameters from model_trained/eval/tfma/1534067348/variables/variables


INFO:tensorflow:Restoring parameters from model_trained/eval/tfma/1534067348/variables/variables


INFO:tensorflow:Restoring parameters from model_trained/eval/tfma/1534067348/variables/variables
