In [1]:
import os
from random import shuffle
import tensorflow as tf

from tqdm import tqdm
import pandas as pd
import shutil

from tensorflow.keras.layers import (
    MaxPooling2D,
    Dropout,
    Flatten,
    Dense,
    Input,
    Lambda,
)
from tensorflow.keras.preprocessing.image import (
    load_img,
    img_to_array,
    array_to_img,
)
from sklearn.model_selection import train_test_split
from keras.applications.mobilenet_v2 import MobileNetV2
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from sklearn.preprocessing import LabelBinarizer
import numpy as np



In [2]:
import warnings

warnings.filterwarnings("ignore")

In [3]:
from ultralytics import YOLO

<h1> Pre-processing data


Chosen dataset: https://www.kaggle.com/datasets/biancaferreira/african-wildlife <br>
With this dataset I have 4 classes: buffalo, elephant, rhino, zebra. <br>
Each class have image + annotation. First step I need to divide the dataset into processed_data folder


In [4]:
data_path = "./data/"
processed_data_path = "./processed_data"
os.makedirs(processed_data_path, exist_ok=True)

In [5]:
labels_folder = os.listdir(data_path)

In [6]:
df = pd.DataFrame(
    columns=["class", "img_path", "x", "y", "w", "h"]
)

In [7]:
images_destination_folder = f"{processed_data_path}/images/"
labels_destination_folder = f"{processed_data_path}/labels/"
os.makedirs(images_destination_folder, exist_ok=True)
os.makedirs(labels_destination_folder, exist_ok=True)

In [8]:
for label in tqdm(labels_folder):
    org_path = f"{data_path}/{label}"
    files = zip(os.listdir(org_path)[::2], os.listdir(org_path)[1::2])
    for img, file in tqdm(files):
        lb_file = f"{org_path}/{file}"
        img_path = f"{org_path}/{img}"
        numOfImage = len(os.listdir(images_destination_folder))
        img_new_path = os.path.join(
            images_destination_folder, f"africa{numOfImage}.jpg"
        )
        label_new_path = os.path.join(
            labels_destination_folder, f"africa{numOfImage}.txt"
        )
        shutil.copy(
            img_path,
            img_new_path,
        )
        shutil.copy(
            lb_file,
            label_new_path,
        )
        lb_info = open(lb_file, "r").read().split()
        objects = [[] for i in range(0, len(lb_info), 5)]
        for idx, obj_info in enumerate(lb_info):
            if idx % 5 == 0:
                continue
            else:
                objects[idx // 5].append(obj_info)
        for obj_list in objects:
            if obj_list:
                obj_details = obj_list

                lb = label
                x = float(obj_details[0])
                y = float(obj_details[1])
                w = float(obj_details[2])
                h = float(obj_details[3])

                row = {
                    "class": lb,
                    "img_path": img_new_path,
                    "x": x,
                    "y": y,
                    "w": w,
                    "h": h,
                }

                df = df.append(row, ignore_index=True)

376it [00:03, 105.38it/s]00<?, ?it/s]
376it [00:03, 98.93it/s] 03<00:10,  3.58s/it]
376it [00:02, 176.55it/s]07<00:07,  3.71s/it]
376it [00:02, 133.01it/s]09<00:02,  2.99s/it]
100%|██████████| 4/4 [00:12<00:00,  3.09s/it]


In [9]:
df

Unnamed: 0,class,img_path,x,y,w,h
0,buffalo,./processed_data/images/africa0.jpg,0.560000,0.663017,0.617500,0.644769
1,buffalo,./processed_data/images/africa1.jpg,0.473515,0.508434,0.497592,0.838554
2,buffalo,./processed_data/images/africa2.jpg,0.819167,0.593750,0.148333,0.242500
3,buffalo,./processed_data/images/africa2.jpg,0.747500,0.472500,0.221667,0.190000
4,buffalo,./processed_data/images/africa2.jpg,0.524167,0.543750,0.165000,0.232500
...,...,...,...,...,...,...
2680,zebra,./processed_data/images/africa1501.jpg,0.433594,0.518919,0.387500,0.656757
2681,zebra,./processed_data/images/africa1502.jpg,0.548828,0.492568,0.330469,0.974324
2682,zebra,./processed_data/images/africa1503.jpg,0.205859,0.592568,0.221094,0.520270
2683,zebra,./processed_data/images/africa1503.jpg,0.431641,0.597297,0.242969,0.562162


In [10]:
train_dir = "./processed_data/train/"
val_dir = "./processed_data/val"
labels_path = "./processed_data/labels/"

In [11]:
os.makedirs(train_dir, exist_ok=True)
os.makedirs(val_dir, exist_ok=True)
os.makedirs(train_dir + "/images", exist_ok=True)
os.makedirs(train_dir + "/labels", exist_ok=True)
os.makedirs(val_dir + "/images", exist_ok=True)
os.makedirs(val_dir + "/labels", exist_ok=True)

In [12]:
files = os.listdir(images_destination_folder)
shuffle(files)

In [13]:
def split(files, ratio):
    elements = len(files)
    middle = int(elements * ratio)
    return [files[:middle], files[middle:]]


def copy_files(images_path, labels_path, destination_path, files):
    for file_name in files:
        file_name = file_name.split(".")[0]

        src = images_path + f"{file_name}.jpg"
        dst = destination_path + "/images"
        shutil.copy(src, dst)

        src = labels_path + f"{file_name}.txt"
        dst = destination_path + "/labels"
        shutil.copy(src, dst)

In [14]:
train_ratio = 0.75
train_files, val_files = split(files, train_ratio)

copy_files(images_destination_folder, labels_path, train_dir, train_files)
copy_files(images_destination_folder, labels_path, val_dir, val_files)

In [15]:
class_training = {idx: label for idx, label in enumerate(labels_folder)}

In [16]:
class_training

{0: 'buffalo', 1: 'elephant', 2: 'rhino', 3: 'zebra'}

In [17]:
with open(f"./processed_data/africa_data.yaml", "w") as f:
    f.write("train: ./train/images\n")
    f.write("val: ./val/images\n")
    f.write("nc: 4\n")
    f.write(f"names: {class_training}")

YOLOv8 model


In [18]:
model = YOLO("yolov8n.pt")

In [19]:
results = model.train(
    data="./processed_data/africa_data.yaml", epochs=10, imgsz=320
)  # train the model
results = model.val()  # evaluate model performance on the validation set

New https://pypi.org/project/ultralytics/8.1.1 available 😃 Update with 'pip install -U ultralytics'
Ultralytics YOLOv8.0.233 🚀 Python-3.9.13 torch-2.1.0+cpu CPU (AMD Ryzen 7 6800H with Radeon Graphics)
[34m[1mengine\trainer: [0mtask=detect, mode=train, model=yolov8n.pt, data=./processed_data/africa_data.yaml, epochs=10, time=None, patience=50, batch=16, imgsz=320, save=True, save_period=-1, cache=False, device=None, workers=8, project=None, name=train4, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_masks=False, embed=None, show

[34m[1mtrain: [0mScanning C:\Users\ADMIN\OneDrive - EPITA\Computer-Vision-DSA-23\Lab02\processed_data\train\labels... 1128 images, 0 backgrounds, 0 corrupt: 100%|██████████| 1128/1128 [00:01<00:00, 908.76it/s]






[34m[1mtrain: [0mNew cache created: C:\Users\ADMIN\OneDrive - EPITA\Computer-Vision-DSA-23\Lab02\processed_data\train\labels.cache


[34m[1mval: [0mScanning C:\Users\ADMIN\OneDrive - EPITA\Computer-Vision-DSA-23\Lab02\processed_data\val\labels... 376 images, 0 backgrounds, 0 corrupt: 100%|██████████| 376/376 [00:00<00:00, 884.43it/s]

[34m[1mval: [0mNew cache created: C:\Users\ADMIN\OneDrive - EPITA\Computer-Vision-DSA-23\Lab02\processed_data\val\labels.cache





Plotting labels to runs\detect\train4\labels.jpg... 
[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m AdamW(lr=0.00125, momentum=0.9) with parameter groups 57 weight(decay=0.0), 64 weight(decay=0.0005), 63 bias(decay=0.0)
10 epochs...
Closing dataloader mosaic

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       1/10         0G     0.8289      2.178      1.029         11        320: 100%|██████████| 71/71 [01:00<00:00,  1.17it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 12/12 [00:09<00:00,  1.29it/s]

                   all        376        681      0.625      0.582      0.634       0.46






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       2/10         0G     0.9204      1.435      1.083         15        320: 100%|██████████| 71/71 [00:59<00:00,  1.19it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 12/12 [00:09<00:00,  1.24it/s]

                   all        376        681      0.761      0.692      0.782      0.557






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       3/10         0G     0.9371      1.333      1.086         18        320: 100%|██████████| 71/71 [01:04<00:00,  1.10it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 12/12 [00:10<00:00,  1.13it/s]

                   all        376        681      0.854      0.743      0.838      0.579






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       4/10         0G     0.9374      1.247      1.093         21        320: 100%|██████████| 71/71 [01:02<00:00,  1.14it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 12/12 [00:09<00:00,  1.23it/s]

                   all        376        681      0.831      0.756      0.842      0.585






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       5/10         0G     0.9179      1.169      1.069          9        320: 100%|██████████| 71/71 [01:04<00:00,  1.10it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 12/12 [00:10<00:00,  1.16it/s]

                   all        376        681      0.838      0.729      0.828      0.599






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       6/10         0G     0.8761      1.067       1.06         20        320: 100%|██████████| 71/71 [01:04<00:00,  1.11it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 12/12 [00:10<00:00,  1.19it/s]

                   all        376        681      0.854      0.793       0.88      0.649






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       7/10         0G      0.838     0.9878      1.033         14        320: 100%|██████████| 71/71 [01:05<00:00,  1.08it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 12/12 [00:10<00:00,  1.19it/s]

                   all        376        681      0.867      0.857      0.918      0.699






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       8/10         0G     0.7669     0.8753      1.003         11        320: 100%|██████████| 71/71 [01:04<00:00,  1.11it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 12/12 [00:09<00:00,  1.20it/s]

                   all        376        681      0.906      0.856       0.93      0.696






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       9/10         0G     0.7358     0.8006     0.9853         19        320: 100%|██████████| 71/71 [01:04<00:00,  1.10it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 12/12 [00:10<00:00,  1.19it/s]

                   all        376        681      0.906      0.862      0.928      0.701






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      10/10         0G     0.7013     0.7383     0.9711         23        320: 100%|██████████| 71/71 [01:07<00:00,  1.05it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 12/12 [00:10<00:00,  1.19it/s]

                   all        376        681      0.922      0.869      0.928      0.735






10 epochs completed in 0.207 hours.
Optimizer stripped from runs\detect\train4\weights\last.pt, 6.2MB
Optimizer stripped from runs\detect\train4\weights\best.pt, 6.2MB

Validating runs\detect\train4\weights\best.pt...
Ultralytics YOLOv8.0.233 🚀 Python-3.9.13 torch-2.1.0+cpu CPU (AMD Ryzen 7 6800H with Radeon Graphics)
Model summary (fused): 168 layers, 3006428 parameters, 0 gradients, 8.1 GFLOPs


                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 12/12 [00:09<00:00,  1.24it/s]


                   all        376        681      0.923      0.869      0.928      0.735
               buffalo        376        121      0.982       0.95      0.987      0.828
              elephant        376        211      0.863      0.763      0.865      0.605
                 rhino        376        132      0.976       0.92      0.972      0.828
                 zebra        376        217      0.872      0.843      0.889       0.68
Speed: 0.2ms preprocess, 12.9ms inference, 0.0ms loss, 0.5ms postprocess per image
Results saved to [1mruns\detect\train4[0m
Ultralytics YOLOv8.0.233 🚀 Python-3.9.13 torch-2.1.0+cpu CPU (AMD Ryzen 7 6800H with Radeon Graphics)
Model summary (fused): 168 layers, 3006428 parameters, 0 gradients, 8.1 GFLOPs


[34m[1mval: [0mScanning C:\Users\ADMIN\OneDrive - EPITA\Computer-Vision-DSA-23\Lab02\processed_data\val\labels.cache... 376 images, 0 backgrounds, 0 corrupt: 100%|██████████| 376/376 [00:00<?, ?it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 24/24 [00:09<00:00,  2.64it/s]


                   all        376        681      0.922      0.871      0.929      0.735
               buffalo        376        121      0.982       0.95      0.987      0.829
              elephant        376        211      0.862      0.763      0.866      0.606
                 rhino        376        132      0.968      0.927      0.972      0.824
                 zebra        376        217      0.874      0.843      0.893      0.682
Speed: 0.2ms preprocess, 11.5ms inference, 0.0ms loss, 0.5ms postprocess per image
Results saved to [1mruns\detect\train42[0m


SSD Transfer Learning model


In [20]:
classes = df["class"].unique().tolist()

In [21]:
label_encoder = LabelBinarizer()

In [22]:
images = []
labels = []
bboxes = []
for index, row in tqdm(df.iterrows()):
    img_path = row["img_path"]
    original_img = load_img(img_path)
    original_array = img_to_array(original_img)
    resized_array = np.array(array_to_img(original_array).resize((320, 320)))
    original_size = original_img.size
    resized_size = (320, 320)
    x_scale = resized_size[0] / original_size[0]
    y_scale = resized_size[1] / original_size[1]
    images.append(resized_array)

    x = row["x"]
    y = row["y"]
    w = row["w"]
    h = row["h"]
    x_min = (x - w / 2) * x_scale
    y_min = (y - h / 2) * y_scale
    x_max = (x + w / 2) * x_scale
    y_max = (y + h / 2) * y_scale
    bboxes.append((x_min, y_min, x_max, y_max))
    labels.append(row["class"])

2685it [00:44, 60.35it/s] 


In [23]:
images = np.array(images, dtype="float32") / 255.0
labels = np.array(labels)
bboxes = np.array(bboxes, dtype="float32")
labels = label_encoder.fit_transform(labels)

In [24]:
print(bboxes)

[[     0.1005     0.26521      0.3475     0.76722]
 [    0.11543    0.068748     0.37101     0.71534]
 [    0.39733       0.378     0.47644       0.572]
 ...
 [     0.0305     0.18216     0.10125     0.46723]
 [    0.09925     0.17327       0.177      0.4813]
 [    0.16575     0.19548     0.20525     0.46723]]


In [25]:
(
    train_images,
    val_images,
    train_bboxes,
    val_bboxes,
    train_labels,
    val_labels,
) = train_test_split(images, bboxes, labels, test_size=0.20, random_state=12)

In [26]:
losses = {
    "class_label": "categorical_crossentropy",
    "bounding_box": "mean_squared_error",
}

train_targets = {
    "class_label": train_labels,
    "bounding_box": train_bboxes,
}

val_targets = {
    "class_label": val_labels,
    "bounding_box": val_bboxes,
}

In [27]:
base_learning_rate = 0.0015

opt = Adam(base_learning_rate)

In [28]:
input_images = Input(shape=(320, 320, 3), name="input_images")

base_model = MobileNetV2(
    weights="imagenet", include_top=False, input_tensor=input_images
)
base_model.trainable = False
base_out = base_model.output
flatten_output = Flatten()(base_out)  # transform matrix into array 1D
bbox_layers = Dense(128, activation="relu")(flatten_output)
bbox_layers = Dense(64, activation="relu")(bbox_layers)
bbox_layers = Dense(32, activation="relu")(bbox_layers)
bbox_layers = Dropout(0.3)(bbox_layers)

label_layers = Dense(256, activation="relu")(flatten_output)
label_layers = Dense(128, activation="relu")(label_layers)

bounding_box = Dense(4, activation="sigmoid", name="bounding_box")(bbox_layers)
predictions_class = Dense(
    len(classes), activation="softmax", name="class_label"
)(label_layers)

model_ssd = Model(
    inputs=base_model.input,
    outputs=(bounding_box, predictions_class),
)
model_ssd.compile(
    optimizer=opt,
    loss=losses,
    metrics=["accuracy"],
)
model_ssd.summary()



Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_images (InputLayer)      [(None, 320, 320, 3  0           []                               
                                )]                                                                
                                                                                                  
 Conv1 (Conv2D)                 (None, 160, 160, 32  864         ['input_images[0][0]']           
                                )                                                                 
                                                                                                  
 bn_Conv1 (BatchNormalization)  (None, 160, 160, 32  128         ['Conv1[0][0]']                  
                                )                                                             

In [29]:
with tf.device("/CPU:0"):
    model_ssd.fit(
        train_images,
        train_targets,
        validation_data=(
            val_images,
            val_targets,
        ),
        epochs=15,
        batch_size=64,
        verbose=1,
    )

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


In [30]:
model_ssd.save("./ssd_model/model.h5")