# Preparation
### Transfer xml to csv file

In [1]:
# Tutorial:
# https://tensorflow-object-detection-api-tutorial.readthedocs.io/en/latest/training.html

'''
Tensorflow GPU version: 2.0.0
CUDA version: 10.0
OS system: Windows 10

Notice: the tutorial tensorflow version is 1.9
'''

# packages for xml to csv
import os
import glob
import pandas as pd
import argparse
import xml.etree.ElementTree as ET

In [2]:
def xml_to_csv(input_path=None, output_csv_path=None):
    # if input variables are not defined, then put them in current working directory and input_path separately
    if(input_path==None):
        input_path = os.getcwd()
    if(output_csv_path==None):
        output_csv_path = os.getcwd()
    
    xml_list = []
    for xml_file in os.listdir(input_path):
        xml_file_path = os.path.join(input_path, xml_file)
        
        # parse the xml file
        tree = ET.parse(xml_file_path) 
        root = tree.getroot()

        # root.findall('object'): find the <object> tag which can lead to the number of labeled items
        # better open the .xml file to check the tags and infos inside
        for member in root.findall('object'):
            value = (root.find('filename').text,      # info of image file name
                     int(root.find('size')[0].text),  # width of the image
                     int(root.find('size')[1].text),  # height of the image
                                                      # depth of the image can be found by -- int(root.find('size')[2].text)
                     member[0].text,                  # the first tag under object tag which is the label
                     int(member[1][0].text),          # the label box xmin
                     int(member[1][1].text),          # the label box ymin
                     int(member[1][2].text),          # the label box xmax
                     int(member[1][3].text),          # the label box ymax
                     )
            xml_list.append(value)

    column_name = ['filename', 'width', 'height', 'class', 'xmin', 'ymin', 'xmax', 'ymax']
    xml_df = pd.DataFrame(xml_list, columns=column_name)
    
    # write the data into csv
    csv_file = os.path.join(output_csv_path, "label.csv")
    xml_df.to_csv(csv_file, index=False)

    print('Successfully converted xml to csv: {}'.format(output_csv_path))

### Transfer csv to TFrecord file

In [3]:
# packages for csv to record
import os
import io

import random
import pandas as pd
import tensorflow as tf
import sys


# sys.path is a list with paths to modules
# sys.path.append can add a new path while using python, but the new path will disappear after python is closed

    # sys.path.append("../../models/research")

sys.path.append("C:/Users/user/Tensorflow/models/research")
from PIL import Image
from object_detection.utils import dataset_util
from collections import namedtuple, OrderedDict

In [4]:
def label_dict(dataframe):
    labels = dataframe['class']

    embedding_list = []
    for i, label in enumerate(set(labels)):
        embed = [i, label]
        embedding_list.append(embed)
    return embedding_list

def class_text_to_int(label, embedding):
    for INDEX, CLASS in embedding:
        if label == CLASS:
            return INDEX
        else:
            None
            
def split_train_valid(files, valid_ratio=0.2):
    random.seed("HoHoHo, Merry Christmas")
    valid_files = random.sample(files, int(len(files)*valid_ratio))
    train_files = set(files) - set(valid_files)
    return train_files, valid_files

In [5]:
def create_tf_example(file, path, group, embedding):
    # tf.io.gfile.GFile: like the open() which is built-in python but faster
    # 'rb': read data in bytes
    # 'r': read data in string
    with tf.io.gfile.GFile(os.path.join(path, file), 'rb') as file_id:
        encoded_jpg = file_id.read()

    encoded_jpg_io = io.BytesIO(encoded_jpg)   # encode jpg file by io.BytesIO
    img = Image.open(encoded_jpg_io)           # read the encoded jpg file
    width, height = img.size                   # save the size(width and height) of the jpg file
    filename = file.encode('utf8')              # encode file name by utf-8

    image_format = b'jpg'                      # check the format is matching the images
    xmins = []
    xmaxs = []
    ymins = []
    ymaxs = []
    classes_text = []
    classes = []

    # index for data info stored in csv file
    # -----------
    # 0, filename
    # 1, width
    # 2, height
    # 3, class
    # 4, xmin
    # 5, ymin
    # 6, xmax
    # 7, ymax
    # -----------

    value_df = group.get_group(file)   # get the data under current group in the loop
    for object_info in value_df.values:
        classes_text.append(object_info[3].encode('utf8')) # for save label type b'label', where 'b' stands for binary data
        classes.append(class_text_to_int(object_info[3], embedding))  # class text to integer, namely, embedding or categorize

        xmins.append(object_info[4] / width)
        ymins.append(object_info[5] / width)
        xmaxs.append(object_info[6] / height)
        ymaxs.append(object_info[7] / height)

    tf_example = tf.train.Example(features=tf.train.Features(feature={
        'image/height': dataset_util.int64_feature(height),
        'image/width': dataset_util.int64_feature(width),
        'image/filename': dataset_util.bytes_feature(filename),
        'image/source_id': dataset_util.bytes_feature(filename),
        'image/encoded': dataset_util.bytes_feature(encoded_jpg),
        'image/format': dataset_util.bytes_feature(image_format),
        'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
        'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
        'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
        'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
        'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
        'image/object/class/label': dataset_util.int64_list_feature(classes),
    }))

    return tf_example

### Create label map
<pre>
<font size = 4>label map is like the format below</font>
<b>
item {
    id: 1
    name: 'cat'
}

item {
    id: 2
    name: 'dog'
}
</b>
<font size = 4>which will be used for training and object detection so we must create them</font>
</pre>

In [6]:
def create_label_map(embedding):
    with open("label.pbtext", "w") as label_map:    
        for ID, NAME in embedding:
            line0 = "item {\n"
            line1 = "    id: {}\n".format(ID)
            line2 = "    name: '{}'\n".format(NAME)
            line3 = "}\n"

            print(line0 + line1 + line2 + line3, file=label_map)

### Main function

In [7]:
def main():
    xml_to_csv(train_path_annotation, file_path)                 # transfer xml to csv file
    img_path = train_path_img                                    # path for images(training)
    output_path = os.path.join(os.getcwd(), "output_files")      # path for output as .record file
    csvfile = os.path.join(file_path, "label.csv")               # path for csv file
    examples = pd.read_csv(csvfile)                              # read csv file which contains the info for labels 
    embedding = label_dict(examples)                             # create a embedding list for create_tf_example()
    create_label_map(embedding)                                  # create label map for training as .pbtext file
    
    ### print out label number created for modifying .config
    print("\nTotal label number: {}".format(len(embedding)))       
    print("-----")
    for idx, label in embedding:
        print("{}: {}".format(idx, label))
    
    ### print out the frequence table for labels
    print("\nFrequence Table")
    print("-----")
    print(pd.value_counts(examples["class"]))
    
    ### use TFRecordWriter to write TFRecord file
    group = examples.groupby("filename")                         # create tensorflow example by each jpg file
    files = group.groups.keys()                                  # get file names from groupby data
    train_files, valid_files = split_train_valid(files)          # separate train and validation images
    
    # train.record
    writer = tf.io.TFRecordWriter(os.path.join(output_path, "train.record"))
    for jpg in train_files:
        tf_example = create_tf_example(jpg, img_path, group, embedding)
        writer.write(tf_example.SerializeToString())             # serialize the output by package protobuf
    writer.close()
    
    # valid.record
    writer = tf.io.TFRecordWriter(os.path.join(output_path, "valid.record"))
    for jpg in valid_files:
        tf_example = create_tf_example(jpg, img_path, group, embedding)
        writer.write(tf_example.SerializeToString())             # serialize the output by package protobuf
    writer.close()
    
    print()
    print('Successfully created the TFRecords: {}'.format(output_path))

# Process transformation

In [8]:
class color:
    PURPLE = '\033[95m'
    CYAN = '\033[96m'
    DARKCYAN = '\033[36m'
    BLUE = '\033[94m'
    GREEN = '\033[92m'
    YELLOW = '\033[93m'
    RED = '\033[91m'
    BOLD = '\033[1m'
    UNDERLINE = '\033[4m'
    END = '\033[0m'

# get image path
project04_root = os.path.dirname(os.getcwd())
img_path = os.path.join(project04_root, "images")
print(color.BOLD + "Data" + color.END)
print("------")
print("Images: {}\n".format(img_path))

# get image path of train and test separately
train_path = os.path.join(img_path, "train_cdc")
test_path = os.path.join(img_path, "test_cdc")
print(color.BOLD + "Train data and Test data" + color.END)
print("------")
print("Train: {}".format(train_path))
print("Test: {}\n".format(test_path))


# get path for annotation of train dataset
train_path_annotation = os.path.join(train_path, "train_annotations")
train_path_img = os.path.join(train_path, "train_images")
test_path_img  = os.path.join(test_path, "test_images")
print(color.BOLD + "Annotation and Images" + color.END)
print("------")
print("Train annotation: {}".format(train_path_annotation))
print("Train images: {}".format(train_path_img))
print("Test images: {}\n".format(test_path_img))


# output file path
file_path = os.path.join(os.getcwd(), "output_files")
print(color.BOLD + "Save output files" + color.END)
print("------")
print("Path for saving outputs: {}".format(file_path))

[1mData[0m
------
Images: D:\DeepLearning\Project04_ContainerDetection\images

[1mTrain data and Test data[0m
------
Train: D:\DeepLearning\Project04_ContainerDetection\images\train_cdc
Test: D:\DeepLearning\Project04_ContainerDetection\images\test_cdc

[1mAnnotation and Images[0m
------
Train annotation: D:\DeepLearning\Project04_ContainerDetection\images\train_cdc\train_annotations
Train images: D:\DeepLearning\Project04_ContainerDetection\images\train_cdc\train_images
Test images: D:\DeepLearning\Project04_ContainerDetection\images\test_cdc\test_images

[1mSave output files[0m
------
Path for saving outputs: D:\DeepLearning\Project04_ContainerDetection\workspace\output_files


# Run preprocessing

In [9]:
main()

Successfully converted xml to csv: D:\DeepLearning\Project04_ContainerDetection\workspace\output_files

Total label number: 13
-----
0: water_tower
1: toilet
2: tub
3: tire
4: bowl
5: bottle
6: bucket
7: plastic_bag
8: box
9: washing_machine
10: aquarium
11: styrofoam
12: plate

Frequence Table
-----
bucket             2200
tire               1357
bowl               1115
plastic_bag        1047
bottle              650
box                 558
styrofoam           410
plate               264
water_tower         174
tub                  55
toilet               46
aquarium             34
washing_machine      25
Name: class, dtype: int64

Successfully created the TFRecords: D:\DeepLearning\Project04_ContainerDetection\workspace\output_files
