# Fine-tuning YOLOv8 with External Dataset and Exporting to TFLite
## Import required packages

In [None]:
from ultralytics import YOLO
import os 
import matplotlib.pyplot as plt
import zipfile
import shutil
import yaml

## Load the Pre-Trained YOLOv8 Model

In [None]:
model = YOLO('yolov8n.pt')

## Fine-Tune the Model 

In [None]:
# Define the folder paths
FOLDER_PATH = os.path.abspath(os.path.join(os.getcwd(), os.pardir))
DATAFOLDER = os.path.join(FOLDER_PATH, 'sentinel_model_gen', 'data-images')

# unzip data files into the specified folder
def unzip_data(zip_file, folder_path):
    # created folder if it does not exist
    if not os.path.exists(folder_path):
        os.makedirs(folder_path)

    # unzip the contents of the zip file to the destination folder
    with zipfile.ZipFile(zip_file, 'r') as zip_ref:
        zip_ref.extractall(folder_path)

    print(f"{zip_file} unzip to {folder_path}")

In [None]:
# Combined folder for datasets 
COMBINED_FOLDER = os.path.join(DATAFOLDER, 'combined-images')

if not os.path.exists(COMBINED_FOLDER):
    os.makedirs(COMBINED_FOLDER)

def combine_and_rename(src_folder, dataset_name):
    for split in ['train', 'valid', 'test']:
        # path to images and labels in the source folder 
        img_src_folder = os.path.join(src_folder, split, 'images')
        lbl_src_folder = os.path.join(src_folder, split, 'labels')

        # destination folders in combined folder 
        img_dest_folder = os.path.join(COMBINED_FOLDER, split, 'images')
        lbl_dest_folder = os.path.join(COMBINED_FOLDER, split, 'labels')

        # create destination folder if they don't exist 
        if not os.path.exists(img_dest_folder):
            os.makedirs(img_dest_folder)
        if not os.path.exists(lbl_dest_folder):
            os.makedirs(lbl_dest_folder)

        # rename and move image and label files 
        img_files = sorted(os.listdir(img_src_folder))
        lbl_files = sorted(os.listdir(lbl_src_folder))

        for i, img_file in enumerate(img_files):
            # get corresponding label file 
            lbl_file = lbl_files[i]

            # new filenames with dataset name as prefix
            new_img_name = f"{dataset_name}-img-{i+1}.jpg"
            new_lbl_name = f"{dataset_name}-img-{i+1}.txt"

            # paths to source files 
            img_src_path = os.path.join(img_src_folder, img_file)
            lbl_src_path = os.path.join(lbl_src_folder, lbl_file)

            # path to destination files 
            img_dest_path = os.path.join(img_dest_folder, new_img_name)
            lbl_dest_path = os.path.join(lbl_dest_folder, new_lbl_name)

            # move and rename the files 
            shutil.copy(img_src_path, img_dest_path)
            shutil.copy(lbl_src_path, lbl_dest_path)

            #print(f"Moved {img_file} -> {new_img_name}")
            #print(f"Moved {lbl_file} -> {new_lbl_name}")

In [None]:
# create yaml file for combined dataset
TRAIN_PATH = os.path.join(COMBINED_FOLDER, 'train', 'images')
VAL_PATH = os.path.join(COMBINED_FOLDER, 'valid', 'images')
TEST_PATH = os.path.join(COMBINED_FOLDER, 'test', 'images')
OUTPUT_PATH = os.path.join(COMBINED_FOLDER, 'data.yaml')


def load_yaml(yaml_path):
    with open(yaml_path, 'r') as f:
        return yaml.safe_load(f)

def combine_yaml(yaml_files):
    combined_data = {
        'train': TRAIN_PATH,
        'val': VAL_PATH,
        'test': TEST_PATH,
        'names': []
    }

    # loop through each yaml file 
    for yaml_file in yaml_files:
        data = load_yaml(yaml_file)
        combined_data['names'].extend(data['names']) # merge class names 

    # remove duplicates from the names if any
    combined_data['names'] = list(set(combined_data['names']))
    combined_data['nc'] = len(combined_data['names']) # set the number of unique classes

    # write combined edata to a new yaml file 
    with open(OUTPUT_PATH, 'w+') as yaml_f: 
        yaml.dump(combined_data, yaml_f)

    print(f"Combined YAML file create at {OUTPUT_PATH}")

In [None]:
# fine tune the YOLO model with new dataset
# epochs=5 for testing purposes
def fine_tune(model, yaml_path, epochs=5, imgsz=640, batch=16, device=None):
    # model.train(data=yaml_path, epochs=epochs, imgsz=imgsz, batch=batch)
    # prepare the arguments for model.train
    train_kwargs = {
        'data': yaml_path,
        'epochs': epochs, 
        'imgsz': imgsz,
        'batch': batch
    }

    # include 'deivce' only if its not none 
    if device is not None: 
        train_kwargs['device'] = device

    # train model 
    model.train(**train_kwargs)
    
    return model

In [None]:
# save the model 
def save_model(model):
    model.save('yolo_fine_tuned.pt')

In [None]:
# unzip parcel images dataset into the 'parcel-images' folder
unzip_data(os.path.join(DATAFOLDER, "parcel.v1i.yolov8.zip"), os.path.join(DATAFOLDER, "parcel-images"))

# unzip the gun images dataset into the 'gun-images' folder
unzip_data(os.path.join(DATAFOLDER, "Weapon classification.v2i.yolov8.zip"), os.path.join(DATAFOLDER, "gun-images"))

# dataset source folder 
parcel_folder = os.path.join(DATAFOLDER, "parcel-images")
gun_folder = os.path.join(DATAFOLDER, "gun-images")

combine_and_rename(parcel_folder, 'parcel')
combine_and_rename(gun_folder, 'gun')

# path to yaml configuration for parcel images and gun images
parcel_yaml = os.path.join(DATAFOLDER, "parcel-images", "data.yaml")
gun_yaml = os.path.join(DATAFOLDER, "gun-images", "data.yaml")

yaml_list = [parcel_yaml, gun_yaml]

combine_yaml(yaml_list)

# fine tune the YOLO model on parcel images 
#parcel_ft_model = fine_tune(model, parcel_yaml)
# further fine tune the previously fine funed model
#gun_ft_model = fine_tune(parcel_ft_model, gun_yaml)

# fine tune the YOLO model with combined dataset images 
fined_tuned_model = fine_tune(model, os.path.join(COMBINED_FOLDER, "data.yaml"))

# save the model
save_model(gun_ft_model)

## Load the Fine-Tuned Model

In [None]:
def load_model(model_name):
    model = YOLO(model_name)
    return model

fined_tuned_model = load_model('yolo_fine_tuned.pt')

## Test Model 

In [None]:
%matplotlib inline
def test_model(model, img_path, conf=0.25):
    # Perform object detection
    results = model(img_path, conf=conf)

    # retrieve the annotated image (with bounding boxes and labels)
    annotated_img = results[0].plot()

    # display the image
    plt.figure(figsize=(10, 10))
    plt.imshow(annotated_img)
    plt.axis('off')
    plt.show()


test_model(fined_tuned_model, os.path.join(FOLDER_PATH, 'sentinel_model_gen', 'guy_w_box.png'))
test_model(fined_tuned_model, os.path.join(FOLDER_PATH, 'sentinel_model_gen', 'guy.png'))
test_model(fined_tuned_model, os.path.join(FOLDER_PATH, 'sentinel_model_gen', 'guy_w_gun.png'))

## Export the model to TFLite Format

In [None]:
# export the model to TFLite for use in the detection system 
def export_model(model): 
    model.export(format='tflite')