# Dotscience Roadsigns Demo

In [8]:
!pip install dotscience
import dotscience as ds
import numpy as np
import pandas as pd
import pickle
import os
import cv2
import random
import skimage.morphology as morp
from skimage.filters import rank
import matplotlib.pyplot as plt
import tensorflow as tf



In [0]:
ds.interactive()

In [0]:
ds.input("train.p")
ds.input("valid.p")
ds.input("test.p")
train = pickle.load(open("train.p","rb"))
valid = pickle.load(open("valid.p","rb"))
test = pickle.load(open("test.p","rb"))

In [23]:
import csv
# Mapping ClassID to traffic sign names
signs = []
with open('signnames.csv', 'r') as csvfile:
    signnames = csv.reader(csvfile, delimiter=',')
    next(signnames,None)
    for row in signnames:
        signs.append(row[1])
    csvfile.close()
    
ds.input("signnames.csv")

'signnames.csv'

In [22]:
!mv ../train.p .

mv: cannot stat '../train.p': No such file or directory


In [14]:
!pwd


/content


In [25]:
X_train, y_train = train['features'], train['labels']
X_valid, y_valid = valid['features'], valid['labels']
X_test, y_test = test['features'], test['labels']

# Number of training examples
n_train = X_train.shape[0]

# Number of testing examples
n_test = X_test.shape[0]

# Number of validation examples.
n_validation = X_valid.shape[0]

# What's the shape of an traffic sign image?
image_shape = X_train[0].shape

# How many unique classes/labels there are in the dataset.
n_classes = len(np.unique(y_train))

print("Number of training examples: ", n_train)
print("Number of testing examples: ", n_test)
print("Number of validation examples: ", n_validation)
print("Image data shape =", image_shape)
print("Number of classes =", n_classes)

Number of training examples:  34799
Number of testing examples:  12630
Number of validation examples:  4410
Image data shape = (32, 32, 3)
Number of classes = 43


In [26]:
X_train[0].shape

(32, 32, 3)

# Preprocess data

In [0]:
# define helper functions
def list_images(dataset, dataset_y, ylabel="", cmap=None):
    """
    Display a list of images in a single figure with matplotlib.
        Parameters:
            images: An np.array compatible with plt.imshow.
            lanel (Default = No label): A string to be used as a label for each image.
            cmap (Default = None): Used to display gray images.
    """
    plt.figure(figsize=(15, 16))
    for i in range(6):
        plt.subplot(1, 6, i+1)
        indx = random.randint(0, len(dataset))
        #Use gray scale color map if there is only one channel
        cmap = 'gray' if len(dataset[indx].shape) == 2 else cmap
        plt.imshow(dataset[indx], cmap = cmap)
        plt.xlabel(signs[dataset_y[indx]])
        plt.ylabel(ylabel)
        plt.xticks([])
        plt.yticks([])
    plt.tight_layout(pad=0, h_pad=0, w_pad=0)
    plt.show()
    
def histogram_plot(dataset, label):
    """
    Plots a histogram of the input data.
        Parameters:
            dataset: Input data to be plotted as a histogram.
            lanel: A string to be used as a label for the histogram.
    """
    hist, bins = np.histogram(dataset, bins=n_classes)
    width = 0.7 * (bins[1] - bins[0])
    center = (bins[:-1] + bins[1:]) / 2
    plt.bar(center, hist, align='center', width=width)
    plt.xlabel(label)
    plt.ylabel("Image count")
    plt.show()
    
def gray_scale(image):
    """
    Convert images to gray scale.
        Parameters:
            image: An np.array compatible with plt.imshow.
    """
    return cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)

def local_histo_equalize(image):
    """
    Apply local histogram equalization to grayscale images.
        Parameters:
            image: A grayscale image.
    """
    kernel = morp.disk(30)
    img_local = rank.equalize(image, selem=kernel)
    return img_local

def image_normalize(image):
    """
    Normalize images to [0, 1] scale.
        Parameters:
            image: An np.array compatible with plt.imshow.
    """
    image = np.divide(image, 255)
    return image

def preprocess(data):
    """
    Applying the preprocessing steps to the input data.
        Parameters:
            data: An np.array compatible with plt.imshow.
    """
    gray_images = list(map(gray_scale, data))
    equalized_images = list(map(local_histo_equalize, gray_images))
    n_training = data.shape
    normalized_images = np.zeros((n_training[0], n_training[1], n_training[2]))
    for i, img in enumerate(equalized_images):
        normalized_images[i] = image_normalize(img)
    normalized_images = normalized_images[..., None]
    return normalized_images

In [0]:
X_valid_preprocessed = preprocess(X_valid)
X_test_preprocessed = preprocess(X_test)
X_train_preprocessed = preprocess(X_train)

# Train & test model

## Decoder model

Uses `tf.keras` to decode base64, and resize the image to a tensor of shape (32, 32, 1).

Note that this model _must_ be supplied urlsafe base64. You can convert regular base64 to urlsafe using Python's [`base64` module](https://docs.python.org/3.7/library/base64.html).

In [0]:
def preprocess_and_decode(img_str):
    #print("[preprocess_and_decode] got %s" % (img_str,))
    img = tf.io.decode_base64(img_str)
    img = tf.image.decode_jpeg(img, channels=1)
    img = tf.image.resize_images(img, (32, 32))
    img = tf.cast(img, tf.float32)
    #img = preprocess(tf.Tensor([img]))
    return img
  
InputLayer = tf.keras.Input(shape = (1,),dtype="string")
OutputLayer = tf.keras.layers.Lambda(lambda img : tf.map_fn(lambda im : preprocess_and_decode(im[0]), img, dtype="float32"))(InputLayer)
base64_model = tf.keras.Model(InputLayer, OutputLayer)


## Convolutional neural net


In [121]:
num_classes=43
conv = tf.keras.models.Sequential()
conv.add(tf.keras.layers.Conv2D(32, kernel_size=(5, 5), strides=(1, 1), activation='relu', input_shape=(32, 32, 1)))
conv.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
conv.add(tf.keras.layers.Conv2D(32, (5, 5), activation='relu'))
conv.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2)))
conv.add(tf.keras.layers.Flatten())
conv.add(tf.keras.layers.Dense(1000, activation='relu'))
conv.add(tf.keras.layers.Dense(units = num_classes, activation='softmax'))

conv.compile(optimizer=ds.parameter("optimizer", 'adam'),
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

es = tf.keras.callbacks.EarlyStopping(monitor='acc')

conv.fit(X_train_preprocessed, y_train,
          epochs=ds.parameter("epochs", 1),
          verbose=1,
          validation_data=(X_valid_preprocessed, y_valid),
          callbacks=[es])

Train on 34799 samples, validate on 4410 samples


<tensorflow.python.keras.callbacks.History at 0x7fa50555e630>

In [0]:
ds.summary("accuracy", conv.evaluate(X_test_preprocessed, y_test)[1])

## Combined model of models

We wire up the decoder and the convolutional neural net, so we can serve them both as a single model.


In [0]:
base64_input = base64_model.input
final_output = conv(base64_model.output)
model = tf.keras.Model(base64_input,final_output)

# Save the `SavedModel` model to Dotscience


In [146]:
# Fetch the Keras session and save the model
# The signature definition is defined by the input and output tensors,
# and stored with the default serving key
import tempfile

MODEL_DIR = "./model"
version = 1
export_path = os.path.join(MODEL_DIR, str(version))
print('export_path = {}\n'.format(export_path))
if os.path.isdir(export_path):
  print('\nAlready saved a model, cleaning up\n')
  !rm -r {export_path}

tf.saved_model.simple_save(
    tf.keras.backend.get_session(),
    export_path,
    inputs={'input_image_bytes': model.input}, 
    outputs={t.name:t for t in model.outputs})

print('\nSaved model:')
!ls -l {export_path}

export_path = ./model/1


Already saved a model, cleaning up

INFO:tensorflow:Assets added to graph.
INFO:tensorflow:No assets to write.
INFO:tensorflow:SavedModel written to: ./model/1/saved_model.pb

Saved model:
total 26108
-rw-r--r-- 1 root root 26728178 Apr 18 14:38 saved_model.pb
drwxr-xr-x 2 root root     4096 Apr 18 14:38 variables


In [128]:
!saved_model_cli show --dir {export_path} --all


MetaGraphDef with tag-set: 'serve' contains the following SignatureDefs:

signature_def['serving_default']:
  The given SavedModel SignatureDef contains the following input(s):
    inputs['input_image_bytes'] tensor_info:
        dtype: DT_STRING
        shape: (-1, 1)
        name: input_15:0
  The given SavedModel SignatureDef contains the following output(s):
    outputs['sequential_1_2/dense_3/Softmax:0'] tensor_info:
        dtype: DT_FLOAT
        shape: (-1, 43)
        name: sequential_1_2/dense_3/Softmax:0
  Method name is: tensorflow/serving/predict


In [138]:
ds.label("model.directory", ds.output("model"))
ds.label("model.framework", "tensorflow")
ds.label("model.framework.version", tf.__version__)

'1.13.1'

In [139]:
for file in os.listdir("model/1"):
    print(file)
    ds.output("model/1/" + file)

variables
saved_model.pb


In [0]:
ds.publish()