# 第10章 了解深度學習的圖像處理與語言處理（1～4節）
在此要學習以深度學習偵測物體的演算法。

若是在Google Colaboratory的環境下執行程式，請確定已將「硬體加速器」設定為「GPU」

In [None]:
#Colaboratory環境的設定
from google.colab import drive
drive.mount('/content/drive')
%cd /content/drive/MyDrive/MathProgramming/Chapter10

In [None]:
#函式庫的設定
!pip install -q -r ./requirements1.txt

## 10-3 試著利用YOLO偵測物體

In [None]:
#下載與解壓縮資料集。
#若無法下載請執行下列的程式碼。
!wget http://pjreddie.com/media/files/VOCtrainval_06-Nov-2007.tar
!tar -xvf ./VOCtrainval_06-Nov-2007.tar

In [None]:
#下載yolov3-tf2
!git clone https://github.com/zzh8829/yolov3-tf2.git ./yolov3_tf2
%cd ./yolov3_tf2
!git checkout c43df87d8582699aea8e9768b4ebe8d7fe1c6b4c
%cd ../

In [None]:
#下載學習完畢的YOLO模型
!wget https://pjreddie.com/media/files/yolov3-tiny.weights 

In [None]:
#將下載的YOLO模型轉換成keras可使用的格式
!python ./yolov3_tf2/convert.py --weights ./yolov3-tiny.weights --output  ./yolov3_tf2/checkpoints/yolov3-tiny.tf --tiny

In [None]:
from PIL import Image

#從下載的資料集之中挑一張的圖像顯示
Image.open("./VOCdevkit/VOC2007/JPEGImages/006626.jpg")

In [None]:
#顯示圖片的註解
annotation = open("./VOCdevkit/VOC2007/Annotations/006626.xml").read()
print(annotation)

In [None]:
import xmltodict
import numpy as np
from tensorflow.keras.utils import Sequence
import math
import yolov3_tf2.yolov3_tf2.dataset as dataset

yolo_max_boxes = 100

#轉換註解資料的格式
def parse_annotation(annotation, class_map):
    label = []
    width = int(annotation['size']['width'])
    height = int(annotation['size']['height'])
    
    if 'object' in annotation:
        if type(annotation['object']) != list:
            tmp = [annotation['object']]
        else:
            tmp = annotation['object']
            
        for obj in tmp:
            _tmp = []
            _tmp.append(float(obj['bndbox']['xmin']) / width)
            _tmp.append(float(obj['bndbox']['ymin']) / height)
            _tmp.append(float(obj['bndbox']['xmax']) / width)
            _tmp.append(float(obj['bndbox']['ymax']) / height)
            _tmp.append(class_map[obj['name']])
            label.append(_tmp)

    for _ in range(yolo_max_boxes - len(label)):
      label.append([0,0,0,0,0])
    return label

In [None]:
from yolov3_tf2.yolov3_tf2.dataset import transform_images

#只載入學習所需的圖片的類別
class ImageDataSequence(Sequence):
    def __init__(self, file_name_list, batch_size,  anchors, anchor_masks, class_names, data_shape=(256,256,3)):
        
        #建立儲存類別名稱與對應數值的字典
        self.class_map = {name: idx for idx, name in enumerate(class_names)}
        self.file_name_list = file_name_list

        self.image_file_name_list = ["./VOCdevkit/VOC2007/JPEGImages/"+image_path + ".jpg" for image_path in self.file_name_list]
        self.annotation_file_name_list = ['./VOCdevkit/VOC2007/Annotations/' + image_path+ ".xml" for image_path in self.file_name_list]

        self.length = len(self.file_name_list)
        self.data_shape = data_shape
        self.batch_size = batch_size
        self.anchors = anchors
        self.anchor_masks = anchor_masks

        self.labels_cache = [None for i in range(self.__len__())]

    #每次自動呼叫此函數。只載入必要的圖片檔與對應的標籤。
    def __getitem__(self, idx):
        images = []
        labels = []
        
        #idx變數儲存了現在是第幾批次的資料、所以可根據此變數載入對應的資料
        for index in range(idx*self.batch_size, (idx+1)*self.batch_size):

          #將註解轉換成可使用的標籤
          annotation = xmltodict.parse((open(self.annotation_file_name_list[index]).read()))
          label = parse_annotation(annotation["annotation"], self.class_map)
          labels.append(label)

          #載入與加工圖片
          img_raw = tf.image.decode_jpeg(open(self.image_file_name_list[index], 'rb').read(), channels=3)
          img = transform_images(img_raw, self.data_shape[0])
          images.append(img)
        
        #標籤也需要前置處理，但實在太耗費時間，所以載入之後，儲存為快取資料
        if self.labels_cache[idx] is None:
          labels = tf.convert_to_tensor(labels, tf.float32)
          labels = dataset.transform_targets(labels, self.anchors, self.anchor_masks, self.data_shape[0])
          self.labels_cache[idx] = labels
        else: 
          labels = self.labels_cache[idx]

        images = np.array(images)
        return images, labels

    def __len__(self):
        return math.floor(len(self.file_name_list) / self.batch_size)


In [None]:
from  yolov3_tf2.yolov3_tf2.models import  YoloV3Tiny, YoloLoss
from yolov3_tf2.yolov3_tf2.utils import freeze_all
import tensorflow as tf

batch_size=16
data_shape=(416,416,3)
class_names =  ["person", "bird", "cat","cow","dog", "horse","sheep", "aeroplane", "bicycle", "boat", "bus", "car", "motorbike", "train", "bottle", "chair", "diningtable", "pottedplant", "sofa", "tvmonitor"]

anchors = np.array([(10, 14), (23, 27), (37, 58),
                              (81, 82), (135, 169),  (344, 319)],
                             np.float32) / data_shape[0]
anchor_masks = np.array([[3, 4, 5], [0, 1, 2]])

# 載入於yolov3_tf2定義的tiny YOLO模型
model_pretrained = YoloV3Tiny(data_shape[0], training=True, classes=80)
model_pretrained.load_weights("./yolov3_tf2/checkpoints/yolov3-tiny.tf").expect_partial()

model = YoloV3Tiny(data_shape[0], training=True, classes=len(class_names))
#這裡只從學習完畢的模型取得非輸出層的權重
model.get_layer('yolo_darknet').set_weights(model_pretrained.get_layer('yolo_darknet').get_weights())
#不學習輸出層以外的層
freeze_all(model.get_layer('yolo_darknet'))

In [None]:
loss = [YoloLoss(anchors[mask], classes=len(class_names)) for mask in anchor_masks]
model.compile(optimizer=tf.keras.optimizers.Adam(lr=0.001), loss=loss, run_eagerly=False)

#輸出模型的構造
model.summary()

In [None]:
train_file_name_list = open("./VOCdevkit/VOC2007/ImageSets/Main/train.txt").read().splitlines()
validation_file_name_list = open("./VOCdevkit/VOC2007/ImageSets/Main/val.txt").read().splitlines()

train_dataset = ImageDataSequence(train_file_name_list, batch_size, anchors, anchor_masks, class_names, data_shape=data_shape)
validation_dataset = ImageDataSequence(validation_file_name_list, batch_size, anchors, anchor_masks, class_names, data_shape=data_shape)

In [None]:
history = model.fit(train_dataset, validation_data=validation_dataset, epochs=30)

In [None]:
#儲存學習所得的權重
model.save_weights('./saved_models/model_yolo_weights')

## 10-4 評估物體偵測處理的結果

In [None]:
from absl import app, logging, flags
from absl.flags import FLAGS
app._run_init(['yolov3'], app.parse_flags_with_usage)

In [None]:
import cv2
import numpy as np
import matplotlib.pyplot as plt
from yolov3_tf2.yolov3_tf2.utils import draw_outputs

yolo_trained = YoloV3Tiny(classes=len(class_names))
#載入儲存的權重
yolo_trained.load_weights('./saved_models/model_yolo_weights').expect_partial()

In [None]:
img_file_name = "./VOCdevkit/VOC2007/JPEGImages/"+"006626" + ".jpg"

#載入圖片
img_raw = tf.image.decode_jpeg(open(img_file_name, 'rb').read(), channels=3)
img = transform_images(img_raw, data_shape[0])
img = np.expand_dims(img, 0)


#開始預測
boxes, scores, classes, nums = yolo_trained.predict(img)

In [None]:
img = img_raw.numpy()

#將預測結果寫入圖片
img = draw_outputs(img, (boxes, scores, classes, nums), class_names)

#顯示寫有預測結果的圖片
plt.figure(figsize=(10,10))
plt.imshow(img)
plt.show()

In [None]:
#直接使用學習所得的權重

FLAGS.yolo_iou_threshold = 0.5
FLAGS.yolo_score_threshold = 0.5

yolo_class_names = [c.strip() for c in open("./yolov3_tf2/data/coco.names").readlines()]

yolo = YoloV3Tiny(classes=80)
#載入權重
yolo.load_weights("./yolov3_tf2/checkpoints/yolov3-tiny.tf").expect_partial()

In [None]:
img_file_name = "./VOCdevkit/VOC2007/JPEGImages/"+"006626" + ".jpg"

img_raw = tf.image.decode_jpeg(open(img_file_name, 'rb').read(), channels=3)
img = transform_images(img_raw, data_shape[0])
img = np.expand_dims(img, 0)
#開始預測
boxes, scores, classes, nums = yolo.predict(img)

In [None]:
img = img_raw.numpy()
img = draw_outputs(img, (boxes, scores, classes, nums), yolo_class_names)

plt.figure(figsize=(10,10))
plt.imshow(img)
plt.show()