In [17]:
from zenml.core.datasources.csv_datasource import CSVDatasource
from zenml.core.pipelines.training_pipeline import TrainingPipeline
from zenml.core.steps.evaluator.tfma_evaluator import TFMAEvaluator
from zenml.core.steps.split.random_split import RandomSplit
from zenml.core.steps.preprocesser.standard_preprocesser.standard_preprocesser import StandardPreprocesser
from zenml.core.steps.trainer.tensorflow_trainers.tf_ff_trainer import FeedForwardTrainer
from zenml.core.repo.repo import Repository

In [18]:
training_pipeline = TrainingPipeline(name='Quickstart')

2021-02-04 22:41:09,944 — zenml.core.pipelines.base_pipeline — INFO — Pipeline Quickstart created.


In [21]:
# Add a datasource. This will automatically track and version it.
ds = CSVDatasource(name='Pima Indians Diabetes Dataset', 
                   path='gs://zenml_quickstart/diabetes.csv')
# ds = Repository.get_instance().get_datasource_by_name("Pima Indians Diabetes Dataset")
training_pipeline.add_datasource(ds)

2021-02-04 22:41:21,392 — zenml.core.datasources.base_datasource — INFO — Datasource Pima Indians Diabetes Dataset created.


In [23]:
# # Run the pipeline locally
# training_pipeline.run()

In [24]:
# training_pipeline.view_statistics()

In [25]:
from mymodule.processor import MyCustomPreprocesser

In [26]:
training_pipeline.add_preprocesser(MyCustomPreprocesser())

BadName: Ref 'HEAD' did not resolve to an object

In [8]:
# Add a random 70/30 train-eval split
training_pipeline.add_split(RandomSplit(split_map={'train': 0.7, 'eval': 0.3}))

# StandardPreprocesser() has sane defaults for normal preprocessing methods
training_pipeline.add_preprocesser(
    StandardPreprocesser(
        features=['times_pregnant', 'pgc', 'dbp', 'tst', 'insulin', 'bmi',
                  'pedigree', 'age'],
        labels=['has_diabetes'],
        overwrite={'has_diabetes': {
            'transform': [{'method': 'no_transform', 'parameters': {}}]}}
    ))

# Add a trainer
training_pipeline.add_trainer(FeedForwardTrainer(
    loss='binary_crossentropy',
    last_activation='sigmoid',
    output_units=1,
    metrics=['accuracy'],
    epochs=20))


# Add an evaluator
training_pipeline.add_evaluator(
    TFMAEvaluator(slices=[['has_diabetes']],
                  metrics={'has_diabetes': ['binary_crossentropy',
                                            'binary_accuracy']}))


In [None]:
# Run the pipeline locally
training_pipeline.run()

In [8]:
# See schema of data
training_pipeline.view_schema()


Unnamed: 0_level_0,Type,Presence,Valency,Domain
Feature name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
'age',FLOAT,required,single,-
'bmi',FLOAT,required,single,-
'dbp',FLOAT,required,single,-
'has_diabetes',FLOAT,required,single,-
'insulin',FLOAT,required,single,-
'pedigree',FLOAT,required,single,-
'pgc',FLOAT,required,single,-
'times_pregnant',FLOAT,required,single,-
'tst',FLOAT,required,single,-


In [9]:
# See statistics of train and eval
training_pipeline.view_statistics()

2021-02-04 17:34:03,734 — zenml.core.pipelines.training_pipeline — INFO — Viewing statistics. If magic=False then a new window will open up with a notebook for evaluation. If magic=True, then an attempt will be made to append to the current notebook.
Launching server at http://localhost:58522




In [10]:
# Creates a notebook for evaluation
training_pipeline.evaluate()

2021-02-04 17:34:28,663 — zenml.core.pipelines.training_pipeline — INFO — Evaluating pipeline. If magic=False then a new window will open up with a notebook for evaluation. If magic=True, then an attempt will be made to append to the current notebook.
