# <div class="alert alert-block alert-info" style="border-width:4px">SBrain Model Training Using Input Function Then Inference Tutorial </div>

### Lets try it out

Before we begin, it would be good to copy this notebook and rename it with your name at the end, since we don't want multiple people editing the same notebook at the same time, causing reloading issues.

### NOTE : Please try out the [Experiment Management Notebook](../experiment-management/1_SbrainExperimentManagement.ipynb) before this tutorial.



In [None]:
from sbrain.learning.experiment import *
from sbrain.dataset.dataset import *

In [None]:
user_name = "admin"

def uniquify(name):
    import time
    should_uniquify = True
    if should_uniquify:
        return name + user_name + str(time.time()).replace(".","")
    else:
        return name

### Training Model Using Input Function

We are going to use the cifar10 dataset. Its already split into train,eval,predict subsets. Each of the folder contains images in .png format.

In [None]:
def input_function(mode, batch_size, params):
    import os,glob
    import tensorflow as tf
    import sys
    import tarfile
    import pickle
    CLASS_INDEX_MOD = 0

    HEIGHT = 32
    WIDTH = 32
    DEPTH = 3

    CIFAR_DATASET_PATH_IN_NFS="/workspace/shared-dir/sample-notebooks/demo-data/cifar10-input-function"

    def _int64_feature(value):
        return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))

    def _bytes_feature(value):
        return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))

    class Cifar10DataSet(object):
        """Cifar10 data set.
        Described by http://www.cs.toronto.edu/~kriz/cifar.html.
        """

        def __init__(self, data_dir, subset='train'):
            self.data_dir = data_dir
            self.subset = subset

        def get_filenames(self):
            if self.subset in ['train', 'eval', 'predict']:
                path = os.path.join(CIFAR_DATASET_PATH_IN_NFS, self.subset)
                files = glob.glob("{}/*.png".format(path))
                return files
            else:
                raise ValueError('Invalid data subset "%s"' % self.subset)
    
        def parser(self, filename, label):
            image_string = tf.read_file(filename)
            image_decoded = tf.image.decode_png(image_string, channels=3)
            image = tf.cast(image_decoded, tf.float32)
            label = tf.cast(label, tf.int32)
            return ({"data": image}, label)

        
        def get_dataset(self):
            """Read the images and labels from 'filenames'."""
            filenames = self.get_filenames()
            labels = [] 
            classes = {
                'airplane': 0,
                'automobile':1,
                'bird': 2,
                'cat': 3,
                'deer': 4,
                'dog': 5,
                'frog': 6,
                'horse': 7,
                'ship': 8,
                'truck': 9
            }
            
            for f in filenames:
                img_name =  f.split('/')[-1:][0]
                lbl_str = img_name[img_name.index('_')+1:img_name.index('.')]
                lbl_id = classes[lbl_str]
                labels.append(lbl_id)

            dataset = tf.data.Dataset.from_tensor_slices((filenames, labels))
            dataset = dataset.map(self.parser)
            return dataset

    if mode == tf.estimator.ModeKeys.TRAIN:
        subset = 'train'
    elif mode == tf.estimator.ModeKeys.EVAL:
        subset = 'eval'
    else:
        subset = 'predict'
   

    dataset = Cifar10DataSet(CIFAR_DATASET_PATH_IN_NFS, subset).get_dataset()

    dataset = dataset.shuffle(1000).batch(batch_size)
    if mode == tf.estimator.ModeKeys.TRAIN:
        dataset = dataset.repeat()
    return dataset

In [None]:
def cifar_model_function(features, labels, mode, params):
    ## Importing relevant packages
    import tensorflow as tf
    import numpy as np
    ########## Defining ResNet structure as a class. #############

    class ResNet(object):
        """ResNet model."""

        def __init__(self, is_training, data_format, batch_norm_decay, batch_norm_epsilon):
            """ResNet constructor.

            Args:
              is_training: if build training or inference model.
              data_format: the data_format used during computation.
                           one of 'channels_first' or 'channels_last'.
            """
            self._batch_norm_decay = batch_norm_decay
            self._batch_norm_epsilon = batch_norm_epsilon
            self._is_training = is_training
            assert data_format in ('channels_first', 'channels_last')
            self._data_format = data_format

        def forward_pass(self, x):
            raise NotImplementedError(
                'forward_pass() is implemented in ResNet sub classes')

        def _residual_v1(self,
                         x,
                         kernel_size,
                         in_filter,
                         out_filter,
                         stride,
                         activate_before_residual=False):
            """Residual unit with 2 sub layers, using Plan A for shortcut connection."""

            del activate_before_residual
            with tf.name_scope('residual_v1') as name_scope:
                orig_x = x

                x = self._conv(x, kernel_size, out_filter, stride)
                x = self._batch_norm(x)
                x = self._relu(x)

                x = self._conv(x, kernel_size, out_filter, 1)
                x = self._batch_norm(x)

                if in_filter != out_filter:
                    orig_x = self._avg_pool(orig_x, stride, stride)
                    pad = (out_filter - in_filter) // 2
                    if self._data_format == 'channels_first':
                        orig_x = tf.pad(orig_x, [[0, 0], [pad, pad], [0, 0], [0, 0]])
                    else:
                        orig_x = tf.pad(orig_x, [[0, 0], [0, 0], [0, 0], [pad, pad]])

                x = self._relu(tf.add(x, orig_x))

                tf.logging.info('image after unit %s: %s', name_scope, x.get_shape())
                return x

        def _conv(self, x, kernel_size, filters, strides, is_atrous=False):
            """Convolution."""

            padding = 'SAME'
            if not is_atrous and strides > 1:
                pad = kernel_size - 1
                pad_beg = pad // 2
                pad_end = pad - pad_beg
                if self._data_format == 'channels_first':
                    x = tf.pad(x, [[0, 0], [0, 0], [pad_beg, pad_end], [pad_beg, pad_end]])
                else:
                    x = tf.pad(x, [[0, 0], [pad_beg, pad_end], [pad_beg, pad_end], [0, 0]])
                padding = 'VALID'
            return tf.layers.conv2d(
                inputs=x,
                kernel_size=kernel_size,
                filters=filters,
                strides=strides,
                padding=padding,
                use_bias=False,
                data_format=self._data_format)

        def _batch_norm(self, x):
            if self._data_format == 'channels_first':
                data_format = 'NCHW'
            else:
                data_format = 'NHWC'
            return tf.contrib.layers.batch_norm(
                x,
                decay=self._batch_norm_decay,
                center=True,
                scale=True,
                epsilon=self._batch_norm_epsilon,
                is_training=self._is_training,
                fused=True,
                data_format=data_format)

        def _relu(self, x):
            return tf.nn.relu(x)

        def _fully_connected(self, x, out_dim):
            with tf.name_scope('fully_connected') as name_scope:
                x = tf.layers.dense(x, out_dim)

            tf.logging.info('image after unit %s: %s', name_scope, x.get_shape())
            return x

        def _avg_pool(self, x, pool_size, stride):
            with tf.name_scope('avg_pool') as name_scope:
                x = tf.layers.average_pooling2d(
                    x, pool_size, stride, 'SAME', data_format=self._data_format)

            tf.logging.info('image after unit %s: %s', name_scope, x.get_shape())
            return x

        def _global_avg_pool(self, x):
            with tf.name_scope('global_avg_pool') as name_scope:
                assert x.get_shape().ndims == 4
                if self._data_format == 'channels_first':
                    x = tf.reduce_mean(x, [2, 3])
                else:
                    x = tf.reduce_mean(x, [1, 2])
            tf.logging.info('image after unit %s: %s', name_scope, x.get_shape())
            return x

    ########## End ResNet class #############

    ####### Subclassing ResNet specific to CIFAR-10 ###########

    class ResNetCifar10(ResNet):
        """Cifar10 model with ResNetV1 and basic residual block."""

        def __init__(self,
                     num_layers,
                     is_training,
                     batch_norm_decay,
                     batch_norm_epsilon,
                     data_format='channels_first'):
            super(ResNetCifar10, self).__init__(
                is_training,
                data_format,
                batch_norm_decay,
                batch_norm_epsilon
            )
            self.n = (num_layers - 2) // 6
            # Add one in case label starts with 1. No impact if label starts with 0.
            self.num_classes = 10
            self.filters = [16, 16, 32, 64]
            self.strides = [1, 2, 2]

        def forward_pass(self, x, input_data_format='channels_last'):
            """Build the core model within the graph."""
            if self._data_format != input_data_format:
                if input_data_format == 'channels_last':
                    # Computation requires channels_first.
                    x = tf.transpose(x, [0, 3, 1, 2])
                else:
                    # Computation requires channels_last.
                    x = tf.transpose(x, [0, 2, 3, 1])

            # Image standardization.
            x = x / 128 - 1

            x = self._conv(x, 3, 16, 1)
            x = self._batch_norm(x)
            x = self._relu(x)

            # Use basic (non-bottleneck) block and ResNet V1 (post-activation).
            res_func = self._residual_v1

            # 3 stages of block stacking.
            for i in range(3):
                with tf.name_scope('stage'):
                    for j in range(self.n):
                        if j == 0:
                            # First block in a stage, filters and strides may change.
                            x = res_func(x, 3, self.filters[i], self.filters[i + 1],
                                         self.strides[i])
                        else:
                            # Following blocks in a stage, constant filters and unit stride.
                            x = res_func(x, 3, self.filters[i + 1], self.filters[i + 1], 1)

            x = self._global_avg_pool(x)
            x = self._fully_connected(x, self.num_classes)

            return x
    ####### End ResNetCifar10 class ###########

    ######### Here we define all the hyperparameters, network, loss, optimzier and training operations ##################

    ## Hyperparams
    num_layers = 44

    # batch_norm_decay = 0.997
    batch_norm_decay = params["batch_norm_decay"]
    batch_norm_epsilon = 1e-5
    # weight_decay = 2e-4
    weight_decay = params["weight_decay"]
    learning_rate = 0.1

    is_training = (mode == tf.estimator.ModeKeys.TRAIN)
    ## Neural network architecture
    model = ResNetCifar10(
        num_layers,
        batch_norm_decay=batch_norm_decay,
        batch_norm_epsilon=batch_norm_epsilon,
        is_training=is_training,
        data_format="channels_last")

    data = tf.feature_column.input_layer(features, [tf.feature_column.numeric_column("data", shape=(32,32,3))])
    data = tf.reshape(data, (-1,32,32,3))
    logits = model.forward_pass(data, input_data_format='channels_last')

    if mode == tf.estimator.ModeKeys.PREDICT:
        predictions = {
            'class_ids': tf.argmax(input=logits, axis=1),
            'probabilities': tf.nn.softmax(logits),
            'logits': logits,
        }
        return tf.estimator.EstimatorSpec(mode, predictions=predictions)
    ## Defining Loss
#     labels = tf.string_to_number(labels,out_type=tf.int32)
    loss = tf.losses.sparse_softmax_cross_entropy(logits=logits, labels=labels)
    loss = tf.reduce_mean(loss)
    model_params = tf.trainable_variables()
    loss += weight_decay * tf.add_n([tf.nn.l2_loss(v) for v in model_params])
    predictions = tf.argmax(logits, axis=1)

    ## Compute evaluation metrics.
    accuracy = tf.metrics.accuracy(labels=labels, predictions=tf.argmax(logits, axis=1),
                                   name='acc_op')
    metrics = {'accuracy': accuracy}
    tf.summary.scalar('accuracy', accuracy[1])

    if mode == tf.estimator.ModeKeys.EVAL:
        return tf.estimator.EstimatorSpec( mode, loss=loss, eval_metric_ops=metrics)

    assert mode == tf.estimator.ModeKeys.TRAIN

    ## Create optimizer
    num_batches_per_epoch = 45000 // 64
    boundaries = [ num_batches_per_epoch * x for x in np.array([82, 123, 300], dtype=np.int64)]
    staged_lr = [learning_rate * x for x in [1, 0.1, 0.01, 0.002]]
    learning_rate = tf.train.piecewise_constant(tf.train.get_global_step(), boundaries, staged_lr)
    optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=0.9)

    ## Create global step and training operation
    global_step = tf.train.get_global_step()
    train_op = optimizer.minimize(loss, global_step=global_step)

    ## Some print operations for better logging.
    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
    with tf.control_dependencies(update_ops):
        with tf.control_dependencies([train_op]):
            train_op = tf.Print(predictions, [predictions, tf.shape(predictions), "predictions"], summarize=32)
            train_op = tf.Print(global_step, [global_step])

    ## Return Estimator Spec with loss and training operation
    return tf.estimator.EstimatorSpec(mode, loss=loss, train_op=train_op, training_chief_hooks=None)

### Training Model using SBrain Estimator

The model function we defined above captures the structure of the network, loss and training operation. The next step is to tie this up to other **SBrain** abstractions.

Here, we define a new **SBrain** classification estimator, passing in the same model_function that we defined earlier. This gives us an **SBrain** object which packages your model function.

In [None]:
classification_estimator = Estimator.NewClassificationEstimator(model_fn=cifar_model_function)
name = uniquify("ResnetCifar10InputFunc")
saved_estimator = Estimator.create(estimator_name=name,
                                   description="ResNet Cifar10 estimator trial",
                                   estimator_obj=classification_estimator)

all_resnet_estimators = Estimator.search(estimator_name=name)

In [None]:
run_config = RunConfig(no_of_ps=1, no_of_workers=4, summary_save_frequency=10, run_eval=False, use_gpu=False)
hyper_parameters = HParams(iterations=50,
                       batch_size=8,
                       batch_norm_decay=0.9,
                       batch_norm_epsilon=1e-5,
                       weight_decay=2e-4,
                       learning_rate=0.1)

experiment_name1 = uniquify("Resnet_CIFAR10_model")

experiment1 = Experiment.run(experiment_name=experiment_name1,
                                description="ResNet Model trained on Cifar10 data",
                                estimator=saved_estimator,
                                hyper_parameters=hyper_parameters,
                                run_config=run_config,
                                dataset_version_split=None,
                                input_function=input_function)

experiment1.list_jobs()
experiment1.wait_until_finish()
experiment1.report_status()

In [None]:
job = LearningJob.lookup(experiment_name1)
model1 = job.get_model()
print(model1.model_metrics)

### Inference using the built model

In [None]:
from sbrain.serving.model_service_endpoint import *

end_point_name1="ep_{}".format(model1.model_name)
mep1 = ModelEndpoint.create(model=model1, 
                           endpoint_name=end_point_name1,
                           description=end_point_name1, 
                           number_of_service_replicas=1, 
                           gpu_required=False)

mep1.search(endpoint_name=end_point_name1)


In [None]:
import glob

import base64
def encode_img(img_path):
    with open(img_path, "rb") as imageFile:
        str = base64.b64encode(imageFile.read())
        return str.decode("utf-8")
    
with open("prediction_input.csv", "r") as f:
    records = f.readlines()

records = list(map(lambda x: x.strip().split(","), records))
encoded_imgs = [encode_img(records[0][0])]
        
features_dict = {}
features_dict['features'] = encoded_imgs
result = mep1.predict(features_dict)
print(result)

## Retrieving Model Checkpoint and Transfer Learning With Changed Predict Outputs In Model Function

In [None]:
checkpoint = model1.get_model_checkpoint()
transfer_learning_config = TransferLearningConfig(
    model_checkpoint=checkpoint, 
    vars_to_load=".*", 
    load_only_trainable_vars=True)

In [None]:
def cifar_model_function_only_logits_output(features, labels, mode, params):
    ## Importing relevant packages
    import tensorflow as tf
    import numpy as np
    ########## Defining ResNet structure as a class. #############

    class ResNet(object):
        """ResNet model."""

        def __init__(self, is_training, data_format, batch_norm_decay, batch_norm_epsilon):
            """ResNet constructor.

            Args:
              is_training: if build training or inference model.
              data_format: the data_format used during computation.
                           one of 'channels_first' or 'channels_last'.
            """
            self._batch_norm_decay = batch_norm_decay
            self._batch_norm_epsilon = batch_norm_epsilon
            self._is_training = is_training
            assert data_format in ('channels_first', 'channels_last')
            self._data_format = data_format

        def forward_pass(self, x):
            raise NotImplementedError(
                'forward_pass() is implemented in ResNet sub classes')

        def _residual_v1(self,
                         x,
                         kernel_size,
                         in_filter,
                         out_filter,
                         stride,
                         activate_before_residual=False):
            """Residual unit with 2 sub layers, using Plan A for shortcut connection."""

            del activate_before_residual
            with tf.name_scope('residual_v1') as name_scope:
                orig_x = x

                x = self._conv(x, kernel_size, out_filter, stride)
                x = self._batch_norm(x)
                x = self._relu(x)

                x = self._conv(x, kernel_size, out_filter, 1)
                x = self._batch_norm(x)

                if in_filter != out_filter:
                    orig_x = self._avg_pool(orig_x, stride, stride)
                    pad = (out_filter - in_filter) // 2
                    if self._data_format == 'channels_first':
                        orig_x = tf.pad(orig_x, [[0, 0], [pad, pad], [0, 0], [0, 0]])
                    else:
                        orig_x = tf.pad(orig_x, [[0, 0], [0, 0], [0, 0], [pad, pad]])

                x = self._relu(tf.add(x, orig_x))

                tf.logging.info('image after unit %s: %s', name_scope, x.get_shape())
                return x

        def _conv(self, x, kernel_size, filters, strides, is_atrous=False):
            """Convolution."""

            padding = 'SAME'
            if not is_atrous and strides > 1:
                pad = kernel_size - 1
                pad_beg = pad // 2
                pad_end = pad - pad_beg
                if self._data_format == 'channels_first':
                    x = tf.pad(x, [[0, 0], [0, 0], [pad_beg, pad_end], [pad_beg, pad_end]])
                else:
                    x = tf.pad(x, [[0, 0], [pad_beg, pad_end], [pad_beg, pad_end], [0, 0]])
                padding = 'VALID'
            return tf.layers.conv2d(
                inputs=x,
                kernel_size=kernel_size,
                filters=filters,
                strides=strides,
                padding=padding,
                use_bias=False,
                data_format=self._data_format)

        def _batch_norm(self, x):
            if self._data_format == 'channels_first':
                data_format = 'NCHW'
            else:
                data_format = 'NHWC'
            return tf.contrib.layers.batch_norm(
                x,
                decay=self._batch_norm_decay,
                center=True,
                scale=True,
                epsilon=self._batch_norm_epsilon,
                is_training=self._is_training,
                fused=True,
                data_format=data_format)

        def _relu(self, x):
            return tf.nn.relu(x)

        def _fully_connected(self, x, out_dim):
            with tf.name_scope('fully_connected') as name_scope:
                x = tf.layers.dense(x, out_dim)

            tf.logging.info('image after unit %s: %s', name_scope, x.get_shape())
            return x

        def _avg_pool(self, x, pool_size, stride):
            with tf.name_scope('avg_pool') as name_scope:
                x = tf.layers.average_pooling2d(
                    x, pool_size, stride, 'SAME', data_format=self._data_format)

            tf.logging.info('image after unit %s: %s', name_scope, x.get_shape())
            return x

        def _global_avg_pool(self, x):
            with tf.name_scope('global_avg_pool') as name_scope:
                assert x.get_shape().ndims == 4
                if self._data_format == 'channels_first':
                    x = tf.reduce_mean(x, [2, 3])
                else:
                    x = tf.reduce_mean(x, [1, 2])
            tf.logging.info('image after unit %s: %s', name_scope, x.get_shape())
            return x

    ########## End ResNet class #############

    ####### Subclassing ResNet specific to CIFAR-10 ###########

    class ResNetCifar10(ResNet):
        """Cifar10 model with ResNetV1 and basic residual block."""

        def __init__(self,
                     num_layers,
                     is_training,
                     batch_norm_decay,
                     batch_norm_epsilon,
                     data_format='channels_first'):
            super(ResNetCifar10, self).__init__(
                is_training,
                data_format,
                batch_norm_decay,
                batch_norm_epsilon
            )
            self.n = (num_layers - 2) // 6
            # Add one in case label starts with 1. No impact if label starts with 0.
            self.num_classes = 10
            self.filters = [16, 16, 32, 64]
            self.strides = [1, 2, 2]

        def forward_pass(self, x, input_data_format='channels_last'):
            """Build the core model within the graph."""
            if self._data_format != input_data_format:
                if input_data_format == 'channels_last':
                    # Computation requires channels_first.
                    x = tf.transpose(x, [0, 3, 1, 2])
                else:
                    # Computation requires channels_last.
                    x = tf.transpose(x, [0, 2, 3, 1])

            # Image standardization.
            x = x / 128 - 1

            x = self._conv(x, 3, 16, 1)
            x = self._batch_norm(x)
            x = self._relu(x)

            # Use basic (non-bottleneck) block and ResNet V1 (post-activation).
            res_func = self._residual_v1

            # 3 stages of block stacking.
            for i in range(3):
                with tf.name_scope('stage'):
                    for j in range(self.n):
                        if j == 0:
                            # First block in a stage, filters and strides may change.
                            x = res_func(x, 3, self.filters[i], self.filters[i + 1],
                                         self.strides[i])
                        else:
                            # Following blocks in a stage, constant filters and unit stride.
                            x = res_func(x, 3, self.filters[i + 1], self.filters[i + 1], 1)

            x = self._global_avg_pool(x)
            x = self._fully_connected(x, self.num_classes)

            return x
    ####### End ResNetCifar10 class ###########

    ######### Here we define all the hyperparameters, network, loss, optimzier and training operations ##################

    ## Hyperparams
    num_layers = 44

    # batch_norm_decay = 0.997
    batch_norm_decay = params["batch_norm_decay"]
    batch_norm_epsilon = 1e-5
    # weight_decay = 2e-4
    weight_decay = params["weight_decay"]
    learning_rate = 0.1

    is_training = (mode == tf.estimator.ModeKeys.TRAIN)
    ## Neural network architecture
    model = ResNetCifar10(
        num_layers,
        batch_norm_decay=batch_norm_decay,
        batch_norm_epsilon=batch_norm_epsilon,
        is_training=is_training,
        data_format="channels_last")

    data = tf.feature_column.input_layer(features, [tf.feature_column.numeric_column("data", shape=(32,32,3))])
    data = tf.reshape(data, (-1,32,32,3))
    logits = model.forward_pass(data, input_data_format='channels_last')

    if mode == tf.estimator.ModeKeys.PREDICT:
        predictions = {
            'logits': logits
        }
        return tf.estimator.EstimatorSpec(mode, predictions=predictions)
    ## Defining Loss
#     labels = tf.string_to_number(labels,out_type=tf.int32)
    loss = tf.losses.sparse_softmax_cross_entropy(logits=logits, labels=labels)
    loss = tf.reduce_mean(loss)
    model_params = tf.trainable_variables()
    loss += weight_decay * tf.add_n([tf.nn.l2_loss(v) for v in model_params])
    predictions = tf.argmax(logits, axis=1)

    ## Compute evaluation metrics.
    accuracy = tf.metrics.accuracy(labels=labels, predictions=tf.argmax(logits, axis=1),
                                   name='acc_op')
    metrics = {'accuracy': accuracy}
    tf.summary.scalar('accuracy', accuracy[1])

    if mode == tf.estimator.ModeKeys.EVAL:
        return tf.estimator.EstimatorSpec( mode, loss=loss, eval_metric_ops=metrics)

    assert mode == tf.estimator.ModeKeys.TRAIN

    ## Create optimizer
    num_batches_per_epoch = 45000 // 64
    boundaries = [ num_batches_per_epoch * x for x in np.array([82, 123, 300], dtype=np.int64)]
    staged_lr = [learning_rate * x for x in [1, 0.1, 0.01, 0.002]]
    learning_rate = tf.train.piecewise_constant(tf.train.get_global_step(), boundaries, staged_lr)
    optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=0.9)

    ## Create global step and training operation
    global_step = tf.train.get_global_step()
    train_op = optimizer.minimize(loss, global_step=global_step)

    ## Some print operations for better logging.
    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
    with tf.control_dependencies(update_ops):
        with tf.control_dependencies([train_op]):
            train_op = tf.Print(predictions, [predictions, tf.shape(predictions), "predictions"], summarize=32)
            train_op = tf.Print(global_step, [global_step])

    ## Return Estimator Spec with loss and training operation
    return tf.estimator.EstimatorSpec(mode, loss=loss, train_op=train_op, training_chief_hooks=None)

## IMPORTANT :
When trasfer learning just for changing the predict output following things are required:
1. the predict ouput has to be a dictionary
2. When training the estimator for previous checkpoint, keep the iterations low e.g.10, and learning_rate=0.0 
so the the weights of the previous model are unchanged.

In [None]:
estimator2 = Estimator.NewClassificationEstimator(model_fn=cifar_model_function_only_logits_output)
name2 = uniquify("CIFAR10_Logits_Only")
estimator2 = Estimator.create(name2, "Resnet Cifar10 estimator", estimator2)

run_config2 = RunConfig(no_of_ps=1, no_of_workers=4, summary_save_frequency=10, run_eval=False, use_gpu=False)
hyper_parameters2 = HParams(iterations=10,
                       batch_size=8,
                       batch_norm_decay=0.9,
                       batch_norm_epsilon=1e-5,
                       weight_decay=2e-4,
                       learning_rate=0.0) 
experiment2_name = uniquify("CIFAR10_Logits_Only_Trial")
experiment2 = Experiment.run(experiment_name=experiment2_name,
                     description="Cifar10 Model",
                     estimator=estimator2,
                     hyper_parameters=hyper_parameters2,
                     run_config=run_config2,
                     dataset_version_split=None,
                     input_function=input_function,
                     transfer_learning_config=transfer_learning_config)
job2 = experiment2.get_single_job()

In [None]:
experiment2.list_jobs()
experiment2.wait_until_finish()
experiment2.report_status()

In [None]:
job2 = LearningJob.lookup(experiment2_name)
model2 = job2.get_model()
print(model2.model_metrics)

## Launching Model Inference Endpoint and making predictions

In [None]:
from sbrain.serving.model_service_endpoint import *

end_point_name2="ep_{}".format(model2.model_name)
mep2 = ModelEndpoint.create(model=model2, 
                           endpoint_name=end_point_name2,
                           description=end_point_name2, 
                           number_of_service_replicas=1, 
                           gpu_required=False)

mep2.search(endpoint_name=end_point_name2)

In [None]:
import glob

import base64
def encode_img(img_path):
    with open(img_path, "rb") as imageFile:
        str = base64.b64encode(imageFile.read())
        return str.decode("utf-8")
    
with open("prediction_input.csv", "r") as f:
    records = f.readlines()

records = list(map(lambda x: x.strip().split(","), records))
 
encoded_imgs = [encode_img(records[0][0])]

features_dict = {}
features_dict['features'] = encoded_imgs
result = mep2.predict(features_dict)
print(result)


We are done!!