# Understanding
## TensorFlow tf.io API for Python

https://www.tensorflow.org/api_docs/python/tf/io

be careful with version v1.14 & v2.0

Why - These standard routines will help you with file I/O and creating data for TensorFlow.  Use these utilities wherever possible!!


In [None]:
import os, sys
import json
import numpy as np
import cv2

import tensorflow as tf
from matplotlib import pyplot as plt

In [None]:
# This is needed to display the images.
%matplotlib inline

In [None]:
# GLOBALs

PROJECT_DIR = os.getcwd()
IMAGE_DIR = os.path.join(PROJECT_DIR, "data/jpeg_images")
SAMPLE_IMAGE = "20190710_variety_1562781002.jpg"

MODEL_PATH = os.path.join(PROJECT_DIR, "trained_model/export/Servo/1564865938")
LABEL_MAP = os.path.join(PROJECT_DIR, "code/cfa_prod_label_map.pbtxt")

# you can get data using the TrainModel_Step1_Local notebook
TEST_TFRECORDS_PATH =  os.path.join(PROJECT_DIR, "code/tfrecords/test/")
                                    
SAMPLE_IMAGE = "/home/ec2-user/SageMaker/ssd-dag/data/jpeg_images/20190710_variety_1562781002.jpg"

### tf.io.gfile

https://www.tensorflow.org/versions/r2.0/api_docs/python/tf/io/gfile

Note that common file i/o commands can be accomplished with this class

In [None]:
# simple directory list
file_list = tf.io.gfile.listdir(IMAGE_DIR)
file_list_length = len(file_list)
print ("directory list length:", file_list_length)

In [None]:
# directory list based on a glob
glob = IMAGE_DIR + '/*9.jpg'
print (glob)
file_list = tf.io.gfile.glob(glob)
file_list_length = len(file_list)
print ("directory list length:", file_list_length)
sample_image = file_list[0]

# note - full path 
print (sample_image)

In [None]:
# read an image - encoded_jpg is type=bytes
# sample_image - from above, not SAMPLE_IMAGE - the global

with tf.io.gfile.GFile(sample_image, 'rb') as fid:
    encoded_jpg = fid.read()
print ("encoded_jpg:", type(encoded_jpg))  # bytes

### Read Image Comparison
- tf.io.gfile.GFile => encoded bytes
- tf.io.read_file   => Tensor
- tf.keras...       => PIL.Image.Image

In [None]:
# tf.io.read_file
image = tf.io.read_file(sample_image)
print ("tf.io.read_file:", type(image))

### tf.keras
better for manipulating the data, shape, type

In [None]:
# tf.keras
image = tf.keras.preprocessing.image.load_img(sample_image, target_size=[300, 300])
print (type(image))
plt.imshow(image)
plt.axis('off')

In [None]:
# convert PIL.Image.Image => numpy array
# - Normalized
x = tf.keras.preprocessing.image.img_to_array(image)
print (type(x), x.shape)

In [None]:
# preprocess - as req'd by MobileNet - to get add the instance dimension
x32 = tf.keras.applications.mobilenet.preprocess_input(x[tf.newaxis,...])
print ("x32:", type(x32), x32.shape, x32.dtype)
print ("one pixel RGB - normalized:", x32[0,1,1,])

In [None]:
# unsigned 8 bit
x8 = x32.astype(np.uint8)
print ("x8:", type(x8), x8.shape, x8.dtype)
print ("one pixel RGB - uint8:", x8[0,1,1,])

## jpeg utilities

In [None]:
# takes a byte string
with tf.io.gfile.GFile(sample_image, 'rb') as fid:
    encoded_jpg = fid.read()
tf.io.is_jpeg(encoded_jpg)

In [None]:
false_image = b'12345'
tf.io.is_jpeg(false_image)