# Understanding Images Using Tensorflow 2.0 (beta)

https://www.tensorflow.org/beta/tutorials/load_data/images  
https://www.tensorflow.org/tutorials/load_data/tf_records

You will need these skills!  What is an image, tf.Example, serialized example etc.  
You won't get far with served models without this understanding.

## TensorFlow 2.0 Beta

### Do this first, then UnderstandingExample.ipynb
This code  wants TF 2.0.   But, you can do the Example code with 1.14 (and eager execution)

In [None]:
import os, pathlib
import random
import IPython.display as display

import matplotlib.pyplot as plt

!pip install -q tensorflow==2.0.0-beta1
# you'll get a tensorflow-serving-api error

import tensorflow as tf

# cfa code utilities
from code.cfa_utils.tar_util import extract_tarball_directory

In [None]:
# you really need TensorFlow 2.0.x
tf.__version__

In [None]:
AUTOTUNE = tf.data.experimental.AUTOTUNE

In [None]:
# if you are using tf 1.14, you need to turn on eager execution - but you should be on 2.0
# adding it here mainly for reference, you can do most of the tf.Example tutorial in 1.14
# tf.enable_eager_execution()

## Globals

you can get some test images from S3 - look for cfa_products / test_images  
There is also a test.tfrecord (that was created as a slice of the train/val split)

In [None]:
PROJECT_DIR = os.getcwd()

S3_ALL_IMAGES = "s3://cfaanalyticsresearch-sagemaker/datasets/cfa_products/all_images/"
S3_ALL_ANNOTATIONS = "s3://cfaanalyticsresearch-sagemaker/datasets/cfa_products/all_annotations"
S3_TEST_IMAGES = "s3://cfaanalyticsresearch-sagemaker/datasets/cfa_products/test_images"

TARBALL_DIR = os.path.join(PROJECT_DIR, "data/tarballs")
TARBALL_EXTRACT = os.path.join(PROJECT_DIR, "data/tarball_extract")

IMAGE_DIR = os.path.join(PROJECT_DIR, "data/jpeg_images")
ANNOTATION_DIR = os.path.join(PROJECT_DIR, "data/annotations")

MODEL_PATH = os.path.join(PROJECT_DIR, "trained_model/export/Servo/1564778509")
LABEL_MAP = os.path.join(PROJECT_DIR, "code/cfa_prod_label_map.pbtxt")

# you can get data using the TrainModel_Step1_Local notebook
TEST_TFRECORDS_PATH =  os.path.join(PROJECT_DIR, "code/tfrecords/test/")
                                    
SAMPLE_IMAGE = os.path.join(PROJECT_DIR, "data/new_jpeg_images/20190710_variety_1562781002.jpg")


## Data - choice
### Large Set of Training Data
### Test Images

Don't do both - choose a set of data then go that block

In [None]:
# Execute THIS block for TRAINING DATA

# TRAINING DATA - ALL IMAGES
# - delete tarballs first
# - delete images first - you can comment this out if you don't like it
! rm {TARBALL_DIR}/*.tar.gz -rf
! rm {IMAGE_DIR}/*.jpg -rf
! rm {ANNOTATION_DIR}/*.xml -rf

# get from 
! aws s3 cp {S3_ALL_IMAGES} {TARBALL_DIR} --recursive --quiet

jpg_ext = '.jpg'
extract_tarball_directory(TARBALL_DIR, TARBALL_EXTRACT, jpg_ext, IMAGE_DIR)

# TRAINING DATA - ALL ANNOTATIONS
# - delete tarballs first
# - delete annotations first - you can comment this out if you don't like it
! rm {TARBALL_DIR}/*.tar.gz -rf
! rm {ANNOTATIONS_DIR}/*.xml -rf

# get from 
! aws s3 cp {S3_ALL_ANNOTATIONS} {TARBALL_DIR} --recursive --quiet

xml_ext = '.xml'
extract_tarball_directory(TARBALL_DIR, TARBALL_EXTRACT, xml_ext, ANNOTATION_DIR)

# clean up
! rm {TARBALL_DIR}/*.tar.gz -rf
! ls {IMAGE_DIR} | wc
! ls {ANNOTATION_DIR} | wc

In [None]:
# Excecute THIS block for Test
! rm {IMAGE_DIR}/*.jpg -rf
! rm {ANNOTATIONS_DIR}/*.xml -rf

# test images are not tarballed
# get from S3
! aws s3 cp {S3_TEST_IMAGES} {IMAGE_DIR} --recursive --quiet

In [None]:
data_root = pathlib.Path(IMAGE_DIR)

# opmit .gitkeep here with a glob
all_image_paths = list(data_root.glob('*.jpg'))
all_image_paths = [str(path) for path in all_image_paths]

# randomize the order
random.shuffle(all_image_paths)

image_count = len(all_image_paths)
image_count

In [None]:
for n in range(3):
  image_path = random.choice(all_image_paths)
  display.display(display.Image(image_path))


## TensorFlow Utilities
the tutorial sez:  
##### TensorFlow includes all the tools you need to load and process images:

In [None]:
# Display an image

img_path = all_image_paths[0]
print ("img_path:", img_path)
display.display(display.Image(img_path))

In [None]:
# read the image into a tensor
# - note - numpy array but it is a serialized string

img_raw = tf.io.read_file(img_path)
print ("img_raw type:", type(img_raw))
print(repr(img_raw)[:100]+"...")

In [None]:
# Decode the tensor (numpy)
# - note that an EagerTensor - the value can be printed
# - with Eager Execution, you don't have to run the session to get the value
img_tensor = tf.image.decode_image(img_raw)
print (type(img_tensor), img_tensor)
print(img_tensor.shape)
print(img_tensor.dtype)

In [None]:
# operations on the tensor
img_final = tf.image.resize(img_tensor, [300, 300])

# you can normalize the image
img_final = img_final/255.0
print("tensor shape:", img_final.shape)
print("tensor range in values:", img_final.numpy().min(), img_final.numpy().max())
print("tensor data type:", img_final.dtype)

## Define Functions

In [None]:
def preprocess_image(image):
  image = tf.image.decode_jpeg(image, channels=3)
  image = tf.image.resize(image, [192, 192])
  image /= 255.0  # normalize to [0,1] range

  return image

In [None]:
def load_and_preprocess_image(path):
  image = tf.io.read_file(path)
  return preprocess_image(image)

In [None]:
# test our functions

image_path = all_image_paths[0]

plt.imshow(load_and_preprocess_image(img_path))
plt.grid(False)
print()

## DataSets


In [None]:
# Take the array of strings -- image paths
# and make a dataset
image_path_ds = tf.data.Dataset.from_tensor_slices(all_image_paths)
print ("image path dataset type:", type(image_path_ds))

#note - strings, no size
print ("image path dataset:", image_path_ds)

In [None]:
# Using dataset map()
# you can map the dataset - super cool!
# - use the functons we defined earlier
# - looks like it will parallelize automatically!

image_ds = image_path_ds.map(load_and_preprocess_image, num_parallel_calls=AUTOTUNE)
print ("image dataset type:", type(image_path_ds))  # same type
print ("image dataset:", image_path_ds)             # serialized numpy array of the normalized image

In [None]:
# you can pull the value out by iterating on the Dataset

for n, image in enumerate(image_ds.take(4)):
  plt.figure(figsize=(8,8))
  plt.subplot(2,2,n+1)
  plt.imshow(image)
  plt.grid(False)
  plt.xticks([])
  plt.yticks([])
  plt.show()