In [3]:
from zenml.core.datasources.csv_datasource import CSVDatasource
from zenml.core.pipelines.training_pipeline import TrainingPipeline
from zenml.core.steps.evaluator.tfma_evaluator import TFMAEvaluator
from zenml.core.steps.split.random_split import RandomSplit
from zenml.core.steps.preprocesser.standard_preprocesser.standard_preprocesser import StandardPreprocesser
from zenml.core.steps.trainer.tensorflow_trainers.tf_ff_trainer import FeedForwardTrainer

In [4]:
training_pipeline = TrainingPipeline(name='Quickstart')

2021-02-04 17:32:51,400 — zenml.core.pipelines.base_pipeline — INFO — Pipeline Quickstart created.


In [5]:
# Add a datasource. This will automatically track and version it.
ds = CSVDatasource(name='Pima Indians Diabetes Dataset', 
                   path='gs://zenml_quickstart/diabetes.csv')
training_pipeline.add_datasource(ds)

2021-02-04 17:32:56,135 — zenml.core.datasources.base_datasource — INFO — Datasource Pima Indians Diabetes Dataset created.


In [6]:
# Add a random 70/30 train-eval split
training_pipeline.add_split(RandomSplit(split_map={'train': 0.7, 'eval': 0.3}))

# StandardPreprocesser() has sane defaults for normal preprocessing methods
training_pipeline.add_preprocesser(
    StandardPreprocesser(
        features=['times_pregnant', 'pgc', 'dbp', 'tst', 'insulin', 'bmi',
                  'pedigree', 'age'],
        labels=['has_diabetes'],
        overwrite={'has_diabetes': {
            'transform': [{'method': 'no_transform', 'parameters': {}}]}}
    ))

# Add a trainer
training_pipeline.add_trainer(FeedForwardTrainer(
    loss='binary_crossentropy',
    last_activation='sigmoid',
    output_units=1,
    metrics=['accuracy'],
    epochs=20))


# Add an evaluator
training_pipeline.add_evaluator(
    TFMAEvaluator(slices=[['has_diabetes']],
                  metrics={'has_diabetes': ['binary_crossentropy',
                                            'binary_accuracy']}))


In [7]:
# Run the pipeline locally
training_pipeline.run()

2021-02-04 17:33:13,713 — zenml.core.backends.orchestrator.local.zenml_local_orchestrator — INFO — Component DataGen is running.




2021-02-04 17:33:14,003 — zenml.core.steps.data.csv_data_step — INFO — Matched 1: ['gs://zenml_quickstart/diabetes.csv']
2021-02-04 17:33:14,005 — zenml.core.steps.data.csv_data_step — INFO — Using header from file: gs://zenml_quickstart/diabetes.csv.
2021-02-04 17:33:14,251 — zenml.core.steps.data.csv_data_step — INFO — Header: ['times_pregnant', 'pgc', 'dbp', 'tst', 'insulin', 'bmi', 'pedigree', 'age', 'has_diabetes'].


Connecting anonymously.


2021-02-04 17:33:18,748 — zenml.core.backends.orchestrator.local.zenml_local_orchestrator — INFO — Component DataGen is finished.
2021-02-04 17:33:18,750 — zenml.core.backends.orchestrator.local.zenml_local_orchestrator — INFO — Component DataStatistics is running.
2021-02-04 17:33:19,815 — zenml.core.backends.orchestrator.local.zenml_local_orchestrator — INFO — Component DataStatistics is finished.
2021-02-04 17:33:19,815 — zenml.core.backends.orchestrator.local.zenml_local_orchestrator — INFO — Component DataSchema is running.


Instructions for updating:
Use eager execution and: 
`tf.data.TFRecordDataset(path)`


2021-02-04 17:33:19,843 — zenml.core.backends.orchestrator.local.zenml_local_orchestrator — INFO — Component DataSchema is finished.
2021-02-04 17:33:19,844 — zenml.core.backends.orchestrator.local.zenml_local_orchestrator — INFO — Component SplitGen is running.
2021-02-04 17:33:20,344 — zenml.core.backends.orchestrator.local.zenml_local_orchestrator — INFO — Component SplitGen is finished.
2021-02-04 17:33:20,345 — zenml.core.backends.orchestrator.local.zenml_local_orchestrator — INFO — Component SplitStatistics is running.
2021-02-04 17:33:22,145 — zenml.core.backends.orchestrator.local.zenml_local_orchestrator — INFO — Component SplitStatistics is finished.
2021-02-04 17:33:22,146 — zenml.core.backends.orchestrator.local.zenml_local_orchestrator — INFO — Component SplitSchema is running.
2021-02-04 17:33:22,164 — zenml.core.backends.orchestrator.local.zenml_local_orchestrator — INFO — Component SplitSchema is finished.
2021-02-04 17:33:22,164 — zenml.core.backends.orchestrator.local

Instructions for updating:
Schema is a deprecated, use schema_utils.schema_from_feature_spec to create a `Schema`
Instructions for updating:
Use ref() instead.
Instructions for updating:
This function will only be available through the v1 compatibility library as tf.compat.v1.saved_model.utils.build_tensor_info or tf.compat.v1.saved_model.build_tensor_info.
'Counter' object has no attribute 'name'
'Counter' object has no attribute 'name'
'Counter' object has no attribute 'name'
'Counter' object has no attribute 'name'
'Counter' object has no attribute 'name'
'Counter' object has no attribute 'name'


2021-02-04 17:33:31,252 — zenml.core.backends.orchestrator.local.zenml_local_orchestrator — INFO — Component Transform is finished.
2021-02-04 17:33:31,253 — zenml.core.backends.orchestrator.local.zenml_local_orchestrator — INFO — Component Trainer is running.
Model: "functional_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
age_xf (InputLayer)             [(None, 1)]          0                                            
__________________________________________________________________________________________________
bmi_xf (InputLayer)             [(None, 1)]          0                                            
__________________________________________________________________________________________________
dbp_xf (InputLayer)             [(None, 1)]          0                                            
________________________

Instructions for updating:
use `tf.profiler.experimental.stop` instead.


Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


Instructions for updating:
This property should not be used in TensorFlow 2.0, as updates are applied automatically.
Instructions for updating:
This property should not be used in TensorFlow 2.0, as updates are applied automatically.


2021-02-04 17:33:37,053 — zenml.core.backends.orchestrator.local.zenml_local_orchestrator — INFO — Component Trainer is finished.
2021-02-04 17:33:37,054 — zenml.core.backends.orchestrator.local.zenml_local_orchestrator — INFO — Component Evaluator is running.
2021-02-04 17:33:39,826 — zenml.core.backends.orchestrator.local.zenml_local_orchestrator — INFO — Component Evaluator is finished.


In [8]:
# See schema of data
training_pipeline.view_schema()


Unnamed: 0_level_0,Type,Presence,Valency,Domain
Feature name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
'age',FLOAT,required,single,-
'bmi',FLOAT,required,single,-
'dbp',FLOAT,required,single,-
'has_diabetes',FLOAT,required,single,-
'insulin',FLOAT,required,single,-
'pedigree',FLOAT,required,single,-
'pgc',FLOAT,required,single,-
'times_pregnant',FLOAT,required,single,-
'tst',FLOAT,required,single,-


In [9]:
# See statistics of train and eval
training_pipeline.view_statistics()

2021-02-04 17:34:03,734 — zenml.core.pipelines.training_pipeline — INFO — Viewing statistics. If magic=False then a new window will open up with a notebook for evaluation. If magic=True, then an attempt will be made to append to the current notebook.
Launching server at http://localhost:58522




In [10]:
# Creates a notebook for evaluation
training_pipeline.evaluate()

2021-02-04 17:34:28,663 — zenml.core.pipelines.training_pipeline — INFO — Evaluating pipeline. If magic=False then a new window will open up with a notebook for evaluation. If magic=True, then an attempt will be made to append to the current notebook.
