<a href="https://colab.research.google.com/github/h-wi/2022-Winter-TIL/blob/main/TFLite_Model_Maker_Object_Detection_ipynb%EC%9D%98_%EC%82%AC%EB%B3%B8.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### Environment setup

In [None]:
!sudo apt -y install libportaudio2
!pip install -q --use-deprecated=legacy-resolver tflite-model-maker
!pip install -q pycocotools
!pip install -q opencv-python-headless==4.1.2.30
!pip uninstall -y tensorflow && pip install -q tensorflow==2.8.0

Reading package lists... Done
Building dependency tree       
Reading state information... Done
The following NEW packages will be installed:
  libportaudio2
0 upgraded, 1 newly installed, 0 to remove and 27 not upgraded.
Need to get 65.4 kB of archives.
After this operation, 223 kB of additional disk space will be used.
Get:1 http://archive.ubuntu.com/ubuntu focal/universe amd64 libportaudio2 amd64 19.6.0-1build1 [65.4 kB]
Fetched 65.4 kB in 0s (174 kB/s)
debconf: unable to initialize frontend: Dialog
debconf: (No usable dialog-like program is installed, so the dialog based frontend cannot be used. at /usr/share/perl5/Debconf/FrontEnd/Dialog.pm line 76, <> line 1.)
debconf: falling back to frontend: Readline
debconf: unable to initialize frontend: Readline
debconf: (This frontend requires a controlling tty.)
debconf: falling back to frontend: Teletype
dpkg-preconfigure: unable to re-open stdin: 
Selecting previously unselected package libportaudio2:amd64.
(Reading database ... 129499 

### Import libraries

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import numpy as np
import os

from tflite_model_maker.config import QuantizationConfig
from tflite_model_maker.config import ExportFormat
from tflite_model_maker import model_spec
from tflite_model_maker import object_detector

import tensorflow as tf
assert tf.__version__.startswith('2')

tf.get_logger().setLevel('ERROR')
from absl import logging
logging.set_verbosity(logging.ERROR)

tf.test.is_gpu_available()

 The versions of TensorFlow you are currently using is 2.8.0 and is not supported. 
Some things might work, some things might not.
If you were to encounter a bug, do not file an issue.
If you want to make sure you're using a tested and supported configuration, either change the TensorFlow version or the TensorFlow Addons's version. 
You can find the compatibility matrix in TensorFlow Addon's readme:
https://github.com/tensorflow/addons


True

In [None]:
!tar -xvf /content/drive/MyDrive/plastic_bottles.tar

In [None]:
!ls /content/Annotations | head -5

2001_jpg.rf.ba1857508348a8dae58dec77f018109d.xml
2002_jpg.rf.1ae8d5564ef53bfe1f56b5249db47cbf.xml
2003_jpg.rf.0490359686436e0dc62675ec8dd2f848.xml
2004_jpg.rf.992323f4a0714cada84705b35eb579b1.xml
2005_jpg.rf.cfaa3e66fd8168a7eabe6d9c80795850.xml


In [None]:
import xml.etree.ElementTree as ET
import cv2


def read_content(xml_file: str):
    tree = ET.parse(xml_file)
    root = tree.getroot()
    filename = root.find('filename').text
    filename = os.path.join("/content/Images", filename)
    image = cv2.imread(filename)
    h, w = image.shape[:2]

    labels = []
    for boxes in root.iter('object'):
        ymin, xmin, ymax, xmax = None, None, None, None
        class_name = boxes.find("name").text
        ymin = int(boxes.find("bndbox/ymin").text) / h
        xmin = int(boxes.find("bndbox/xmin").text) / w
        ymax = int(boxes.find("bndbox/ymax").text) / h
        xmax = int(boxes.find("bndbox/xmax").text) / w
        list_with_single_boxes = [xmin, ymin, xmax, ymax]
        labels.append({class_name: list_with_single_boxes})
    return filename, labels

filename, labels = read_content("/content/Annotations/2001_jpg.rf.ba1857508348a8dae58dec77f018109d.xml")

In [None]:
filename

'/content/Images/2001_jpg.rf.ba1857508348a8dae58dec77f018109d.jpg'

In [None]:
labels

[{'lid': [0.5048076923076923,
   0.20673076923076922,
   0.5480769230769231,
   0.24759615384615385]},
 {'label': [0.4951923076923077,
   0.2860576923076923,
   0.5865384615384616,
   0.4495192307692308]}]

In [None]:
import glob

annotations = glob.glob("/content/Annotations/*")

with open("voc.csv", "w") as f:
    for i, path in enumerate(annotations):
        image_path, labels = read_content(path)

        seventy_percent = int(len(annotations) * 0.7)

        if i < seventy_percent:
            dataset_fold = "TRAIN"
        elif i > seventy_percent < seventy_percent+500:
            dataset_fold = "TEST"
        else:
            dataset_fold = "VALIDATE"

        for obj in labels:
            class_name, (x1,y1,x2,y2) = list(obj.items())[0]
            label = f"{dataset_fold},{image_path},{class_name},{x1},{y1},,,{x2},{y2},,"
            f.write(label+"\n")

### Model preparation

In [None]:
spec = model_spec.get('efficientdet_lite3')
train_data, validation_data, test_data = object_detector.DataLoader.from_csv('voc.csv')

### Model training

In [None]:
model = object_detector.create(train_data, model_spec=spec, batch_size=16, epochs=20, train_whole_model=True, validation_data=validation_data)

In [None]:
model.export(export_dir='./')

### inference via tensorflow

In [None]:
import cv2
from PIL import Image
import time
import glob

model_path = 'model.tflite'

# Load the labels into a list
classes = ['???'] * model.model_spec.config.num_classes
label_map = model.model_spec.config.label_map

for label_id, label_name in label_map.as_dict().items():
    classes[label_id-1] = label_name

# Define a list of colors for visualization
COLORS = np.random.randint(0, 255, size=(len(classes), 3), dtype=np.uint8)

def preprocess_image(image_path, input_size):
    """Preprocess the input image to feed to the TFLite model"""
    img = tf.io.read_file(image_path)
    img = tf.io.decode_image(img, channels=3)
    img = tf.image.convert_image_dtype(img, tf.uint8)
    original_image = img
    resized_img = tf.image.resize(img, input_size)
    resized_img = resized_img[tf.newaxis, :]
    resized_img = tf.cast(resized_img, dtype=tf.uint8)
    return resized_img, original_image


def detect_objects(interpreter, image, threshold):
    """Returns a list of detection results, each a dictionary of object info."""

    signature_fn = interpreter.get_signature_runner()

    # Feed the input image to the model
    st = time.time()
    output = signature_fn(images=image)
    print(f"Elapsed: {(time.time() - st)*1000:.3f} ms")
    

    # Get all outputs from the model
    count = int(np.squeeze(output['output_0']))
    scores = np.squeeze(output['output_1'])
    classes = np.squeeze(output['output_2'])
    boxes = np.squeeze(output['output_3'])

    results = []
    for i in range(count):
        if scores[i] >= threshold:
            result = {
            'bounding_box': boxes[i],
            'class_id': classes[i],
            'score': scores[i]
            }
            results.append(result)
    return results


def run_odt_and_draw_results(image_path, interpreter, threshold=0.5):
    """Run object detection on the input image and draw the detection results"""
    # Load the input shape required by the model
    _, input_height, input_width, _ = interpreter.get_input_details()[0]['shape']

    # Load the input image and preprocess it
    preprocessed_image, original_image = preprocess_image(
      image_path,
      (input_height, input_width)
    )

    # Run object detection on the input image
    results = detect_objects(interpreter, preprocessed_image, threshold=threshold)

    # Plot the detection results on the input image
    original_image_np = original_image.numpy().astype(np.uint8)
    for obj in results:
        # Convert the object bounding box from relative coordinates to absolute
        # coordinates based on the original image resolution
        ymin, xmin, ymax, xmax = obj['bounding_box']
        xmin = int(xmin * original_image_np.shape[1])
        xmax = int(xmax * original_image_np.shape[1])
        ymin = int(ymin * original_image_np.shape[0])
        ymax = int(ymax * original_image_np.shape[0])

        # Find the class index of the current object
        class_id = int(obj['class_id'])

        # Draw the bounding box and label on the image
        color = [int(c) for c in COLORS[class_id]]
        cv2.rectangle(original_image_np, (xmin, ymin), (xmax, ymax), color, 2)
        # Make adjustments to make the label visible for all objects
        y = ymin - 15 if ymin - 15 > 15 else ymin + 15
        label = "{}: {:.0f}%".format(classes[class_id], obj['score'] * 100)
        cv2.putText(original_image_np, label, (xmin, y),
            cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)

    # Return the final image
    original_uint8 = original_image_np.astype(np.uint8)
    return original_uint8

In [None]:
from IPython.display import display
import random
images = glob.glob("/content/Images/*")

In [None]:
# Load the TFLite model
interpreter = tf.lite.Interpreter(model_path=model_path)
interpreter.allocate_tensors()

#Run object detection and show the detection results
DETECTION_THRESHOLD = 0.5

for i in range(5):
    image = images[random.randint(1, len(images))]
    
    im = Image.open(image)
    im.thumbnail((320, 320), Image.ANTIALIAS)
    im.save("/tmp/image.png", 'PNG')
    
    # Run inference and draw detection result on the local copy of the original file
    detection_result_image = run_odt_and_draw_results(
        "/tmp/image.png",
        interpreter,
        threshold=DETECTION_THRESHOLD
    )

    # Show the detection result
    display(Image.fromarray(detection_result_image))