In [1]:
import tensorflow as tf
import os
import argparse
from tensorflow.python.keras.callbacks import Callback

class MyFashionMnist(object):
  def train(self):
    
    # 입력 값을 받게 추가합니다.
    parser = argparse.ArgumentParser()
    parser.add_argument('--learning_rate', required=False, type=float, default=0.001)
    parser.add_argument('--dropout_rate', required=False, type=float, default=0.2)
    # epoch 5 ~ 15
    parser.add_argument('--epoch', required=False, type=int, default=5)    
    # relu, sigmoid, softmax, tanh
    parser.add_argument('--act', required=False, type=str, default='relu')        
    # layer 1 ~ 5
    parser.add_argument('--layer', required=False, type=int, default=1)        
    
    
    args = parser.parse_args()    
    
    (x_train, y_train), (x_test, y_test) = tf.keras.datasets.fashion_mnist.load_data()
    x_train, x_test = x_train / 255.0, x_test / 255.0

    model = tf.keras.models.Sequential()
    model.add(tf.keras.layers.Flatten(input_shape=(28, 28)))
    
    for i in range(int(args.layer)):    
        model.add(tf.keras.layers.Dense(128, activation=args.act))
        model.add(tf.keras.layers.Dropout(args.dropout_rate))
        
    model.add(tf.keras.layers.Dense(10, activation='softmax'))
    model.summary()
    
    model.compile(optimizer=tf.keras.optimizers.Adam(lr=args.learning_rate),
                  loss='sparse_categorical_crossentropy',
                  metrics=['acc'])

    model.fit(x_train, y_train,
              verbose=0,
              validation_data=(x_test, y_test),
              epochs=args.epoch,
              callbacks=[KatibMetricLog()])

    model.evaluate(x_test,  y_test, verbose=0)

class KatibMetricLog(Callback):
    def on_batch_end(self, batch, logs={}):
        print("batch=" + str(batch),
              "accuracy=" + str(logs.get('acc')),
              "loss=" + str(logs.get('loss')))
    def on_epoch_begin(self, epoch, logs={}):
        print("epoch " + str(epoch) + ":")
    
    def on_epoch_end(self, epoch, logs={}):
        print("Validation-accuracy=" + str(logs.get('val_acc')),
              "Validation-loss=" + str(logs.get('val_loss')))
        return

if __name__ == '__main__':
    if os.getenv('FAIRING_RUNTIME', None) is None:
        from kubeflow import fairing
        from kubeflow.fairing.kubernetes import utils as k8s_utils

        DOCKER_REGISTRY = 'kubeflow-registry.default.svc.cluster.local:30000'
        fairing.config.set_builder(
            'append',
            image_name='katib-job', # here not fairing job but katib job
            base_image='brightfly/kubeflow-jupyter-lab:tf2.0-cpu',
            registry=DOCKER_REGISTRY, 
            push=True)
        # cpu 1, memory 5GiB
        fairing.config.set_deployer('job',
                                    namespace='admin', # here
                                    pod_spec_mutators=[
                                        k8s_utils.get_resource_mutator(cpu=1,  # here
                                                                       memory=5)]
         
                                   )
        fairing.config.run()
    else:
        remote_train = MyFashionMnist()
        remote_train.train()

[I 200720 08:44:56 config:123] Using preprocessor: <kubeflow.fairing.preprocessors.converted_notebook.ConvertNotebookPreprocessor object at 0x7f790bf0db38>
[I 200720 08:44:56 config:125] Using builder: <kubeflow.fairing.builders.append.append.AppendBuilder object at 0x7f790980df28>
[I 200720 08:44:56 config:127] Using deployer: <kubeflow.fairing.builders.append.append.AppendBuilder object at 0x7f790980df28>
[W 200720 08:44:56 append:50] Building image using Append builder...
[I 200720 08:44:56 base:105] Creating docker context: /tmp/fairing_context_kr9pup5t
[I 200720 08:44:57 converted_notebook:127] Converting fashion-mnist-katib.ipynb to fashion-mnist-katib.py
[I 200720 08:44:57 docker_creds_:234] Loading Docker credentials for repository 'brightfly/kubeflow-jupyter-lab:tf2.0-cpu'
[W 200720 08:44:59 append:54] Image successfully built in 2.381829743999333s.
[W 200720 08:44:59 append:94] Pushing image kubeflow-registry.default.svc.cluster.local:30000/katib-job:2250A8E1...
[I 200720 08:

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-labels-idx1-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-images-idx3-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-labels-idx1-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-images-idx3-ubyte.gz
2020-07-20 08:45:05.080743: I tensorflow/core/platform/cpu_feature_guard.cc:142] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA
2020-07-20 08:45:05.086650: I tensorflow/core/platform/profile_utils/cpu_utils.cc:94] CPU Frequency: 2200000000 Hz
2020-07-20 08:45:05.087620: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x550dff0 executing computations on platform Host. Devices:
2020-07-20 08:45:05.087650: I tensorflow/compiler/xla/service/service.cc:175]   StreamExecutor device (0): Host, Default Versio

[W 200720 08:45:40 job:162] Cleaning up job fairing-job-d758k...
