# TensorFlow Data Generator

In [5]:
"""
This module is to create original image dataset with TensorFlow.

- Load image from directory.
- Converting image dataset into Tensorflow formatted data. 
"""

import tensorflow.compat.v2 as tf
import tensorflow_datasets as tfds
from tensorflow import keras

from PIL import Image
import numpy as np
import matplotlib.pyplot as plt
import os
import pandas as pd
from IPython.display import display, HTML
import requests
import csv

def get_files(path="../images/*"):
    list_ds = tf.data.Dataset.list_files(path)

    for f in list_ds.take(5):
        print(f.numpy())


def read_imagenet_csv_file(path="../images/imagenet/cat.csv"):
    """Load csv file and convert into list
    The original data is from iamgenet URLs of image file. 
    
    http://image-net.org/synset?wnid=n02123159
    
    Arguments
    ---------
    path : str
    
    Return
    ------
    image_list : list
    """
    
    with open(path, newline="") as f:
        reader = csv.reader(f)
        data = list(reader)
        
        image_list = []
        for l in data:
            image_list.append(l.pop())
        
    return image_list


def create_label(name="cat", num=10):
    l = []
    for i in range(num):
        l.append(name)
        
    return l


def request_image_url(image_urls):
    """Request image url and conver into raw data.
    
    Arguments
    ---------
    image_urls : list
    
    Return
    ------
    image_content_list : list of raw image data.
    """
    
    image_content_list = []
    for i in image_urls:
        try:
            image_data = requests.get(i).content
            image_content_list.append(image_data)
        except:
            print("Request Error : ", i)
            
    return image_content_list

def parser(image_content, label):
    """The parser() is used in dataset.map() to convert each data.
    
    Arguments
    ---------
    image_content : 
        Image content data.
    label         : str
        Label string.
        
    Return
    ------
    casted_image : preprocessed image data.
    label        : string data as it is.
    """
    # Decoding.
    decoded_image = tf.image.decode_jpeg(image_content)
    casted_image = tf.cast(decoded_image, tf.float32)
    return casted_image, label



In [6]:
if __name__ == "__main__":
    """Here is the whole process loading and preprocessing ImageNet dataset."""

    # Read image url from csv file.
    image_list = read_imagenet_csv_file()[:3]
    
    # Request image URL and convert into raw data.
    image_content_list = request_image_url(image_list)

    # Create label based on the above data.
    label_list = create_label(name="cat", num=len(image_content_list))

    # Preprocessing(Converting) to TensorFlow data.
    tf_image_list = tf.constant(image_content_list)
    tf_label_list = tf.constant(label_list)

    # Create dataset based on the above conversion.
    dataset = tf.data.Dataset.from_tensor_slices((tf_image_list, tf_label_list))

    # Decoding .jpg data into. 
    # map() processes each data step by step with given function.
    # Refer to the parser() function defined above. 
    parsed_dataset = dataset.map(parser)

    # Inside of parsed_dataset, each data is tupel (image, label).
    for i in parsed_dataset:
        print(type(i))
        print(i[1]) # Display label.

Request Error :  image_url
<class 'tuple'>
tf.Tensor(b'cat', shape=(), dtype=string)
<class 'tuple'>
tf.Tensor(b'cat', shape=(), dtype=string)
