# Use case 2 - IMSP 1st DATA Science school

We recall the use case here: 

You are a software developper and you want to build an application for botanists to help them recognize
different specices of medecinal flowers. Users on field take pictures of the flowers and the model return the
type of corresponding category with a certain confidence. The predicted category can then be used to know
the medecinal vertues of the plant. Predictions will be based on data collected from different practitioners.

Dataset is dowloaded from Kaggle. Find the dataset actual description here
https://www.kaggle.com/alxmamaev/flowers-recognition

In [1]:
!ls

Colab_Manipulations.pdf        OtherCommands
conda-cheatsheet.pdf	       serving
Delai_rdv_pediatres_2012.xlsx  TF_dev_initiation.ipynb
deseases_client.ipynb	       TF_dev_initiation.ipynb - Colaboratory.pdf
environment.yml		       Untitled.ipynb
flowers_client.ipynb	       Use_Case_1.ipynb
insurance.csv		       Use_case_2.ipynb
my_model


In [1]:
# Import the necessary libraries

# TensorFlow and tf.keras
import tensorflow as tf
from tensorflow import keras

# Helper libraries
import matplotlib.pyplot as plt
import numpy as np
from os import listdir
from os.path import join
import pandas
import os
import random

In [3]:
# Set the path of the input folder 

data = "/media/habib/Data/IMSP/ROetOptimisation/DATASETS/flowers/flowers/"

# List out the directories inside the main input folder
folders = os.listdir(data)
LABELS = folders
print(LABELS)

['daisy', 'dandelion', 'rose', 'sunflower', 'tulip']


## Looking at Friendly libraries not TF

In [0]:
import cv2

# Import the images and resize them to a 128*128 size
# Also generate the corresponding labels

image_names = []
train_labels = []
train_images = []

size = 64,64

for folder in folders:
    for file in os.listdir(os.path.join(data,folder)):
        if file.endswith("jpg"):
            image_names.append(os.path.join(data,folder,file))
            train_labels.append(folder)
            #cv2 openCV package
            img = cv2.imread(os.path.join(data,folder,file))
            im = cv2.resize(img,size)
            train_images.append(im)
        else:
            continue
            
# Extract the labels
label_dummies = pandas.get_dummies(train_labels)
labels =  label_dummies.values.argmax(1)

# Transform the image array to a numpy type
train = np.array(train_images)
train.shape

# Reduce the RGB values between 0 and 1
train = train.astype('float32') / 255.0

In [0]:
pandas.unique(labels)

In [0]:
pandas.unique(train_labels)

In [0]:
# Shuffle the labels and images randomly for better results

union_list = list(zip(train, labels))
random.shuffle(union_list)
train,labels = zip(*union_list)

# Convert the shuffled list to numpy array type

train = np.array(train)
labels = np.array(labels)

## Doing the same thing with TF

In [4]:
#Image size
image_dim_tensor = 64

#Reads an image from a file, decodes it into a dense tensor, and resizes it
# to a fixed shape.
def _parse_function(filename, label):
  size = image_dim_tensor,image_dim_tensor
  NUM_CLASSES = len(LABELS)
  
  image_string = tf.read_file(filename)
  image_decoded = tf.image.decode_jpeg(image_string)
  image_resized = tf.image.resize_images(image_decoded, size)
  image_resized = tf.cast(image_resized , tf.float32) * (1. / 255.0)
  #If a one_hot representation of the labels should be used
  one_hot_label = tf.one_hot(label, NUM_CLASSES)
  
  return {"new_image": image_resized}, label

def _parse_pred_function(filename):
  size = image_dim_tensor,image_dim_tensor
  
  image_string = tf.read_file(filename)
  image_decoded = tf.image.decode_jpeg(image_string)
  image_resized = tf.image.resize_images(image_decoded, size)
  
  image_resized = tf.cast(image_resized , tf.float32) * (1. / 255.0)
  
  return {"new_image": image_resized}


filenames = []
labels_names = []

#For running efficiency we will keep only 200 images at mostper label
max_num_imgs = 200

p = tf.constant(0.2)

for folder in folders:
  print("Fetching ",folder, " images")
  fecthed_imgs = 0
  for file in os.listdir(os.path.join(data,folder)):
    if file.endswith("jpg"):
      if fecthed_imgs > max_num_imgs:
        break
      else:
        filenames.append(os.path.join(data,folder,file))
        labels_names.append(folder)        
    else:
      continue

Fetching  daisy  images
Fetching  dandelion  images
Fetching  rose  images
Fetching  sunflower  images
Fetching  tulip  images


In [5]:
# Shuffle the input set because of ordered sequential loading.
order = np.argsort(np.random.random(len(labels_names)))
filenames = [filenames[i] for i in order]
labels_names = [labels_names[i] for i in order]

# `labels[i]` is the label for the image in `filenames[i].
# Extract the labels as integer
label_dummies = pandas.get_dummies(labels_names)
labels =  label_dummies.values.argmax(1)


In [6]:
DATASET_SIZE = len(filenames)
#int(DATASET_SIZE*0.8)

train_size = int(0.8 * DATASET_SIZE)
test_size = int(0.2 * DATASET_SIZE)

train_filenames = filenames[:train_size]
train_labels_names = labels[:train_size]

test_filenames = filenames[train_size:]
test_labels_names = labels[train_size:]

#Take 10 images from the test set for prediction
some_images = test_filenames[0:9]

## Deprecated way to input an Estimator

In [0]:
class FlowersData(object):    
     
    def __init__(self, tf_dataset, reshape):
        self.pos = 0
        self.images = None
        self.labels = None
        # load entire Dataset into memory by chunks of 10000
        tf_dataset = tf_dataset.batch(10000)
        tf_dataset = tf_dataset.repeat(1)
        features, labels = tf_dataset.make_one_shot_iterator().get_next()         
        with tf.Session() as sess:
            while True:
                try:
                    feats, labs = sess.run([features, labels])
                    #print(labs)
                    self.images = feats if self.images is None else np.concatenate([self.images, feats])
                    self.labels = labs if self.labels is None else np.concatenate([self.labels, labs])
                except tf.errors.OutOfRangeError:
                    break

dataset = tf.data.Dataset.from_tensor_slices(filenames, labels)
dataset = dataset.map(_parse_function)

dataset = dataset.shuffle(10000)

train_dataset = dataset.take(train_size)
test_dataset =  dataset.skip(train_size)

In [0]:
test = FlowersData(test_dataset, False)
train = FlowersData(train_dataset, False)
print('Done loading images')

In [0]:
train_input_fn = tf.estimator.inputs.numpy_input_fn(    
    x={"new_image": np.array(train.images["image_data"])},
    y=np.array(train.labels),
    num_epochs=None,
    shuffle=True)
	
test_input_fn = tf.estimator.inputs.numpy_input_fn(    
    x={"new_image": np.array(test.images["image_data"])},
    y=np.array(test.labels),
    num_epochs=1,
    shuffle=False)

	
#Take 10 images from the test set for prediction
some_images = np.array(test.images["image_data"])
some_images =some_images[0:9]
predict_input_fn = tf.estimator.inputs.numpy_input_fn(
    x={"new_image": some_images},
    num_epochs=1,
    shuffle=False)

In [60]:
#Length of the training set
np.array(train.labels).shape

(3458,)

## Recommanded way to input an Estimator

In [5]:
def train_input_fn(features, labels, batch_size):
    """An input function for training"""
    # A vector of filenames.
    dataset = tf.data.Dataset.from_tensor_slices((features, labels))
    dataset = dataset.map(_parse_function)

    # Shuffle, repeat, and batch the examples.
    dataset = dataset.shuffle(10000).repeat().batch(batch_size)

    # Return the dataset.
    return dataset
  
  
def test_input_fn(features, labels, batch_size):
    """An input function for training"""
    # Convert the inputs to a Dataset.
    dataset = tf.data.Dataset.from_tensor_slices((features, labels))
    dataset = dataset.map(_parse_function)
    
    #batch the examples
    dataset = dataset.batch(batch_size)
    
    # Return the dataset.
    return dataset  
  
def predict_input_fn(features):
  batch_size = 10
  dataset = tf.data.Dataset.from_tensor_slices(features)
  dataset = dataset.map(_parse_pred_function)
  
  dataset = dataset.batch(batch_size)
  return dataset.make_one_shot_iterator().get_next()


## Building the model

The model used is a CNN with 3 layers including 2 convolution layers and one Dense layer at the head.

In [7]:
def model_fn(features, labels, mode, params):
    x = features["new_image"]

    # The convolutional layers expect 4-rank tensors
    # but x is a 2-rank tensor, so reshape it.
    net = tf.reshape(x, [-1, 64, 64, 3])    
    # First convolutional layer.
    net = tf.layers.conv2d(inputs=net, name='layer_conv1',
                           filters=16, kernel_size=5,
                           padding='same', activation=tf.nn.relu)
    net = tf.layers.max_pooling2d(inputs=net, pool_size=2, strides=2)
    # Second convolutional layer.
    net = tf.layers.conv2d(inputs=net, name='layer_conv2',
                           filters=36, kernel_size=5,
                           padding='same', activation=tf.nn.relu)
    net = tf.layers.max_pooling2d(inputs=net, pool_size=2, strides=2)    
    # Flatten to a 2-rank tensor.
    net = tf.contrib.layers.flatten(net)
    # First fully-connected / dense layer.
    # This uses the ReLU activation function.
    net = tf.layers.dense(inputs=net, name='layer_fc1',
                          units=128, activation=tf.nn.relu)    
    # Second fully-connected / dense layer.
    # This is the last layer so it does not use an activation function.
    net = tf.layers.dense(inputs=net, name='layer_fc2',
                          units=len(LABELS))
    # Logits output of the neural network.
    logits = net
    
    label_values = tf.constant(LABELS)
    
    # Softmax output of the neural network.
    y_pred = tf.nn.softmax(logits=logits)
    
    # Classification output of the neural network.
    y_pred_cls = tf.argmax(y_pred, axis=1)

    if mode == tf.estimator.ModeKeys.PREDICT:      
      # Convert predicted_indices back into strings.
      predictions = {
          'classes': tf.gather(label_values, y_pred_cls),
          'scores': tf.reduce_max(y_pred, axis=1)
      }
      export_outputs = {
          'prediction': tf.estimator.export.PredictOutput(predictions)
      }
      
      # If the estimator is supposed to be in prediction-mode 
      spec = tf.estimator.EstimatorSpec(mode, predictions=predictions, export_outputs=export_outputs)               
    else:
        # Otherwise the estimator is supposed to be in either
        # training or evaluation-mode. Note that the loss-function
        # is also required in Evaluation mode.
        
        # Define the loss-function to be optimized, by first
        # calculating the cross-entropy between the output of
        # the neural network and the true labels for the input data.
        # This gives the cross-entropy for each image in the batch.
        cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=labels,
                                                                       logits=logits)

        # Reduce the cross-entropy batch-tensor to a single number
        # which can be used in optimization of the neural network.
        loss = tf.reduce_mean(cross_entropy)

        # Define the optimizer for improving the neural network.
        optimizer = tf.train.AdamOptimizer(learning_rate=params["learning_rate"])

        # Get the TensorFlow op for doing a single optimization step.
        train_op = optimizer.minimize(
            loss=loss, global_step=tf.train.get_global_step())

        # Define the evaluation metrics,
        # in this case the classification accuracy.
        metrics = \
        {
            "accuracy": tf.metrics.accuracy(labels, y_pred_cls)
        }

        # Wrap all of this in an EstimatorSpec.
        spec = tf.estimator.EstimatorSpec(
            mode=mode,
            loss=loss,
            train_op=train_op,
            eval_metric_ops=metrics)
        
    return spec

In [9]:
#Specify hyper-parameters 
params = {"learning_rate": 1e-4}

model = tf.estimator.Estimator(model_fn=model_fn,
                               params=params,
                               model_dir="./my_model/")


INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': './my_model/', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': None, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7f165feb6f60>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


## Training the model
This will take some time depending on the runing environment.

In [10]:
#Training
#model.train(input_fn=train_input_fn, steps=500)
model.train(input_fn= lambda : train_input_fn(train_filenames, train_labels_names, 1000), steps=200)

print("****************** FINISHED TRAINING **********************")

#result = model.evaluate(input_fn=test_input_fn)
result = model.evaluate(input_fn= lambda : test_input_fn(test_filenames, test_labels_names, 1000))

#Acuraccy
print("Classification accuracy: {0:.2%}".format(result["accuracy"]))


INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from ./my_model/model.ckpt-200
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 200 into ./my_model/model.ckpt.
INFO:tensorflow:loss = 0.90758187, step = 201
INFO:tensorflow:Saving checkpoints for 211 into ./my_model/model.ckpt.
INFO:tensorflow:Saving checkpoints for 232 into ./my_model/model.ckpt.
INFO:tensorflow:Saving checkpoints for 252 into ./my_model/model.ckpt.
INFO:tensorflow:Saving checkpoints for 290 into ./my_model/model.ckpt.
INFO:tensorflow:global_step/sec: 0.038319
INFO:tensorflow:loss = 0.7697071, step = 301 (2609.490 sec)
INFO:tensorflow:Saving checkpoints for 346 into ./my_model/model.ckpt.
INFO:tensorflow:Saving checkpoints for 387 into ./my_model/model.ckpt.
INFO:tensorflow:Saving checkpoints for 400 into ./

## Runing the predictions
We evaluate the model on a bunch of images. 10 images were selected from the test set.

In [11]:
#Predictions	
predictions = model.predict(input_fn=lambda : predict_input_fn(some_images))
for pred in predictions:
    print(pred)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from ./my_model/model.ckpt-400
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
{'classes': b'dandelion', 'scores': 0.86428434}
{'classes': b'daisy', 'scores': 0.51471925}
{'classes': b'daisy', 'scores': 0.62386924}
{'classes': b'tulip', 'scores': 0.79162294}
{'classes': b'tulip', 'scores': 0.6998274}
{'classes': b'rose', 'scores': 0.48443404}
{'classes': b'dandelion', 'scores': 0.8203398}
{'classes': b'rose', 'scores': 0.75211614}
{'classes': b'daisy', 'scores': 0.44793501}


In [24]:
some_images

['/media/habib/Data/IMSP/ROetOptimisation/DATASETS/flowers/flowers/daisy/14264136211_9531fbc144.jpg',
 '/media/habib/Data/IMSP/ROetOptimisation/DATASETS/flowers/flowers/daisy/5058708968_8bdcd29e63_n.jpg',
 '/media/habib/Data/IMSP/ROetOptimisation/DATASETS/flowers/flowers/tulip/19689681344_b05ac361f2_n.jpg',
 '/media/habib/Data/IMSP/ROetOptimisation/DATASETS/flowers/flowers/sunflower/4235259239_21f2eb4f2e.jpg',
 '/media/habib/Data/IMSP/ROetOptimisation/DATASETS/flowers/flowers/sunflower/2996573407_5e473b9359.jpg',
 '/media/habib/Data/IMSP/ROetOptimisation/DATASETS/flowers/flowers/sunflower/14858674096_ed0fc1a130.jpg',
 '/media/habib/Data/IMSP/ROetOptimisation/DATASETS/flowers/flowers/dandelion/2469856983_fe8e36ba57.jpg',
 '/media/habib/Data/IMSP/ROetOptimisation/DATASETS/flowers/flowers/daisy/2908212142_5437fa67ff_n.jpg',
 '/media/habib/Data/IMSP/ROetOptimisation/DATASETS/flowers/flowers/tulip/8619064872_dea79a9eb9.jpg']

In [12]:
some_labels = test_labels_names
with tf.Session() as sess:
  preds = sess.run(tf.gather(LABELS, some_labels[0:9]))
  print(preds)

[b'dandelion' b'tulip' b'tulip' b'tulip' b'tulip' b'tulip' b'dandelion'
 b'rose' b'sunflower']


In [13]:
LABELS

['daisy', 'dandelion', 'rose', 'sunflower', 'tulip']

In [17]:
!ls

Colab_Manipulations.pdf        my_model
conda-cheatsheet.pdf	       OtherCommands
Delai_rdv_pediatres_2012.xlsx  TF_dev_initiation.ipynb
deseases_client.ipynb	       TF_dev_initiation.ipynb - Colaboratory.pdf
environment.yml		       Untitled.ipynb
flowers_client.ipynb	       Use_Case_1.ipynb
insurance.csv		       Use_case_2.ipynb


## Export the model for Serving

In [13]:
#Description of input features to the model
feature_spec = {'new_image': tf.FixedLenFeature(shape=[], dtype=tf.float32), }

#Exporting the model
export_path_base = 'serving/versions'

model.export_savedmodel(export_path_base, tf.estimator.export.build_parsing_serving_input_receiver_fn(feature_spec))

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Signatures INCLUDED in export for Classify: None
INFO:tensorflow:Signatures INCLUDED in export for Regress: None
INFO:tensorflow:Signatures INCLUDED in export for Predict: ['prediction', 'serving_default']
INFO:tensorflow:Signatures INCLUDED in export for Train: None
INFO:tensorflow:Signatures INCLUDED in export for Eval: None
INFO:tensorflow:Restoring parameters from ./my_model/model.ckpt-400
INFO:tensorflow:Assets added to graph.
INFO:tensorflow:No assets to write.
INFO:tensorflow:SavedModel written to: serving/versions/temp-b'1544684366'/saved_model.pb


b'serving/versions/1544684366'

In [14]:
#Examine your saved model
!saved_model_cli show --dir "serving/versions/1544677631" --all


MetaGraphDef with tag-set: 'serve' contains the following SignatureDefs:

signature_def['prediction']:
  The given SavedModel SignatureDef contains the following input(s):
    inputs['examples'] tensor_info:
        dtype: DT_STRING
        shape: (-1)
        name: input_example_tensor:0
  The given SavedModel SignatureDef contains the following output(s):
    outputs['classes'] tensor_info:
        dtype: DT_STRING
        shape: (-1)
        name: GatherV2:0
    outputs['scores'] tensor_info:
        dtype: DT_FLOAT
        shape: (-1)
        name: Max:0
  Method name is: tensorflow/serving/predict

signature_def['serving_default']:
  The given SavedModel SignatureDef contains the following input(s):
    inputs['examples'] tensor_info:
        dtype: DT_STRING
        shape: (-1)
        name: input_example_tensor:0
  The given SavedModel SignatureDef contains the following output(s):
    outputs['classes'] tensor_info:
        dtype: DT_STRING
       

## Start serving

In [21]:
# Add TensorFlow Serving distribution URI as a package source
!echo "deb http://storage.googleapis.com/tensorflow-serving-apt stable tensorflow-model-server tensorflow-model-server-universal" | tee /etc/apt/sources.list.d/tensorflow-serving.list && \
curl https://storage.googleapis.com/tensorflow-serving-apt/tensorflow-serving.release.pub.gpg | apt-key add -
!apt update

tee: /etc/apt/sources.list.d/tensorflow-serving.list: Permission non accordée
deb http://storage.googleapis.com/tensorflow-serving-apt stable tensorflow-model-server tensorflow-model-server-universal
Lecture des listes de paquets... Fait
W: chmod 0700 of directory /var/lib/apt/lists/partial failed - SetupAPTPartialDirectory (1: Opération non permise)
E: Impossible d'ouvrir le fichier verrou /var/lib/apt/lists/lock - open (13: Permission non accordée)
E: Impossible de verrouiller le répertoire /var/lib/apt/lists/
W: Problème de suppression du lien /var/cache/apt/pkgcache.bin - RemoveCaches (13: Permission non accordée)
W: Problème de suppression du lien /var/cache/apt/srcpkgcache.bin - RemoveCaches (13: Permission non accordée)


In [22]:
# Install TensorFlow Serving
!apt-get install tensorflow-model-server

E: Impossible d'ouvrir le fichier verrou /var/lib/dpkg/lock - open (13: Permission non accordée)
E: Impossible de verrouiller le répertoire d'administration (/var/lib/dpkg/). Avez-vous les privilèges du superutilisateur ?


In [None]:
!tensorflow_model_server --port=9000 --model_name=Flowers --model_base_path='/home/habib/Documents/serving/versions/' 

## References

https://www.tensorflow.org/serving/tutorials/Serving_REST_simple
