# Example - simple notebook

In [None]:
import tensorflow as tf
import numpy as np

### Load data

In [None]:
fashion_mnist = tf.keras.datasets.fashion_mnist
(train_images, train_labels), (test_images, test_labels) = fashion_mnist.load_data()

### Preprocess data

In [None]:
train_images = train_images / 255.0
test_images = test_images / 255.0

### Create and compile DNN model

In [None]:
model = tf.keras.Sequential([
   tf.keras.layers.Flatten(input_shape=(28, 28)),
   tf.keras.layers.Dense(128, activation='relu'),
   tf.keras.layers.Dense(10),
   tf.keras.layers.Softmax()])

In [None]:
model.compile(optimizer='adam',
             loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
             metrics=['accuracy'])

### Train model

In [None]:
model.fit(train_images, train_labels, epochs=10)

### Evaluate model

In [None]:
test_loss, test_acc = model.evaluate(test_images,  test_labels, verbose=2)
print('\nTest accuracy:', test_acc)

### Save model

In [None]:
model.save('my_model')

### Run sample prediction

In [None]:
predictions = model.predict(test_images)
print('\nPrediction:',predictions[0])

### Training @ TFOperator

In [1]:
gcp_project='<PROJECT_ID>'

In [2]:
import tensorflow as tf
import numpy as np
import logging
class mySimpleTrainer(object):

    def __init__(self):
        self.model = None
        pass

    def load_data_train(self):
        logging.basicConfig(level=logging.INFO)
        logging.info('STEP 1 - Start loading data:')
        fashion_mnist = tf.keras.datasets.fashion_mnist
        (train_images, train_labels), (test_images, test_labels) = fashion_mnist.load_data()
        logging.info('Loading data finished')
        return (train_images, train_labels, test_images, test_labels)
        
    def prepare_data_train(self, train_images, test_images):   
        logging.basicConfig(level=logging.INFO)
        logging.info('STEP 2 - Start preparing data:')
        train_images = train_images / 255.0
        test_images = test_images / 255.0
        logging.info('Preparing data finished')
        return (train_images, test_images)

    def generate_keras_model(self):
        logging.basicConfig(level=logging.INFO)
        logging.info('STEP 3 - Start generating model:')
        model = tf.keras.Sequential([
        tf.keras.layers.Flatten(input_shape=(28, 28)),
        tf.keras.layers.Dense(128, activation='relu'),
        tf.keras.layers.Dense(10),
        tf.keras.layers.Softmax()])
        logging.info('Generating model finished')
        return model

    def train_model(self, model, train_images, train_labels, test_images, test_labels, epochs):
        logging.basicConfig(level=logging.INFO)
        logging.info('STEP 4 - Starting training model:')
        model.compile(optimizer='adam',
             loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
             metrics=['accuracy'])
        model.fit(train_images, train_labels, epochs=epochs)
        logging.info('Train model finished')
                   
    def launch_train_cycle(self,epochs):
        logging.basicConfig(level=logging.INFO)
        logging.info('Starting E2E cycle:')
        logging.info('Arguments:')
        logging.info('epochs:{}'.format(epochs))
        train_images, train_labels, test_images, test_labels = self.load_data_train()
        train_images, test_images = self.prepare_data_train(train_images, test_images)
        model = self.generate_keras_model()
        self.train_model(model, train_images, train_labels, test_images, test_labels, epochs)
        self.model = model
        logging.info('Train cycle finished')

In [3]:
#Parameters
epochs = 10
#Sequence
mySimpleTrainer_instance = mySimpleTrainer()
mySimpleTrainer_instance.launch_train_cycle(epochs)

INFO:root:Starting E2E cycle:
INFO:root:Arguments:
INFO:root:epochs:10
INFO:root:STEP 1 - Start loading data:
INFO:root:Loading data finished
INFO:root:STEP 2 - Start preparing data:
INFO:root:Preparing data finished
INFO:root:STEP 3 - Start generating model:
INFO:root:Generating model finished
INFO:root:STEP 4 - Starting training model:


Train on 60000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


INFO:root:Train model finished
INFO:root:Train cycle finished


In [5]:
class mySimpleTrainerWrapper(object):
    
    def __init__(self):
        self.mySimpleTrainer_instance = mySimpleTrainer()

    def train(self):
        epochs = 10
        self.mySimpleTrainer_instance.launch_train_cycle(epochs)

    def predict(self, X, feature_names=None):
        """Predict using the model for given ndarray."""
        prediction = self.model.predict(data=X)
        # Do any postprocessing
        return prediction

In [None]:
##!pip install --user kubeflow-fairing

In [None]:
##!pip install --user msrestazure

In [25]:
!gcloud auth login --no-launch-browser


You are running on a Google Compute Engine virtual machine.
It is recommended that you use service accounts for authentication.

You can run:

  $ gcloud config set account `ACCOUNT`

to switch accounts if necessary.

Your credentials may be visible to others with access to this
virtual machine. Are you sure you want to authenticate with
your personal account?

Do you want to continue (Y/n)?  ^C


Command killed by keyboard interrupt



In [71]:
import os
from kubeflow import fairing
from kubeflow.fairing.builders import cluster
from kubeflow.fairing import cloud
os.environ["TF_JOB_VERSION"] = "v1"
BASE_IMAGE='tensorflow/tensorflow:2.1.0'
DOCKER_REGISTRY='gcr.io/{}/simple-notebook-example-tf'.format(gcp_project)
fairing.config.set_builder(name='cluster', registry=DOCKER_REGISTRY, 
                           base_image=BASE_IMAGE,
                           pod_spec_mutators=[cloud.gcp.add_gcp_credentials_if_exists],
                           push=True,
                           preprocessor=preprocessor,
                           context_source=cluster.gcs_context.GCSContextSource())
#fairing.config.set_deployer(name='tfjob', worker_count=1, ps_count=1)
#train_tfjob = fairing.config.fn(mySimpleTrainerWrapper)

NameError: name 'cluster_builder' is not defined

In [65]:
train_tfjob()

INFO:root:Using preprocessor: <kubeflow.fairing.preprocessors.function.FunctionPreProcessor object at 0x7f13860e7320>
INFO:root:Using builder: <kubeflow.fairing.builders.cluster.cluster.ClusterBuilder object at 0x7f1386190fd0>
INFO:root:Using deployer: <kubeflow.fairing.deployers.tfjob.tfjob.TfJob object at 0x7f13860e7be0>
INFO:root:Building image using cluster builder.
INFO:root:Creating docker context: /tmp/fairing_context_5d68_gwo
INFO:kubeflow.fairing.kubernetes.manager:Pod started running True


E1108 19:09:38.290112       1 aws_credentials.go:77] while getting AWS credentials NoCredentialProviders: no valid providers in chain. Deprecated.
	For verbose messaging see aws.Config.CredentialsChainVerboseErrors
[36mINFO[0m[0005] Retrieving image manifest tensorflow/tensorflow:2.1.0
E1108 19:09:38.439988       1 metadata.go:154] while reading 'google-dockercfg' metadata: http status code: 404 while fetching url http://metadata.google.internal./computeMetadata/v1/instance/attributes/google-dockercfg
E1108 19:09:38.442201       1 metadata.go:166] while reading 'google-dockercfg-url' metadata: http status code: 404 while fetching url http://metadata.google.internal./computeMetadata/v1/instance/attributes/google-dockercfg-url
[36mINFO[0m[0006] Retrieving image manifest tensorflow/tensorflow:2.1.0
[36mINFO[0m[0007] Built cross stage deps: map[]
[36mINFO[0m[0007] Retrieving image manifest tensorflow/tensorflow:2.1.0
[36mINFO[0m[0007] Retrieving image manifest tensorflow/tensorfl

ERROR:kubeflow.fairing.kubernetes.manager:Failed to launch fairing-tfjob-d4w9t-worker-0, reason: Error, message: None


Traceback (most recent call last):
  File "/app/function_shim.py", line 78, in <module>
    compare_version(args.python_version)
  File "/app/function_shim.py", line 50, in compare_version
    with Python ' + local_python_version + ' in the local environment.')
RuntimeError: The Python version 2.7 mismatches                            with Python 3.6 in the local environment.
