In [None]:
# Mount google drive

from google.colab import drive

drive.mount('/content/drive')

In [None]:
!rm -r data/
!rm -r __MACOSX/
!rm -r test/
!rm -r train/
!rm -r valid/
!rm -r classification_raw_data/
!rm data.*
!rm README.*

In [None]:
!mkdir data
!cp drive/MyDrive/FYP/10000_dataset_binary/data.zip ./data/data.zip

In [None]:
!unzip ./data/data.zip -d data/

In [None]:
# Map the labels

def map_labels():
    mapping = {
        'bamboo_1' : '1s',
        'bamboo_2' : '2s',
        'bamboo_3' : '3s',
        'bamboo_4': '4s',
        'bamboo_5': '5s',
        'bamboo_6': '6s',
        'bamboo_7': '7s',
        'bamboo_8': '8s',
        'bamboo_9': '9s',
        'character_1': '1m',
        'character_2': '2m',
        'character_3': '3m',
        'character_4': '4m',
        'character_5': '5m',
        'character_6': '6m',
        'character_7': '7m',
        'character_8': '8m',
        'character_9': '9m',
        'circle_1': '1p',
        'circle_2': '2p',
        'circle_3': '3p',
        'circle_4': '4p',
        'circle_5': '5p',
        'circle_6': '6p',
        'circle_7': '7p',
        'circle_8': '8p',
        'circle_9': '9p',
        'east': '1z',
        'south': '2z',
        'west': '3z',
        'north': '4z',
        'white': '5z',
        'green': '6z',
        'red': '7z',
    }

    pass

In [None]:
from os.path import split
import os

# Filter out malformatted files
# For yolo without OBB, each line in label file contains only 5 values
def malformat_filter(paths):
    for path in paths:
        targets = []
        for file in os.listdir(path+'labels'):
            lines = []
            label_path = path+'labels/'+file
            with open(label_path, 'r') as f:
                lines = f.readlines()

            for line in lines:
                words = line.split(' ')
                if len(words) > 5:
                    targets.append('.'.join(label_path.split('.')[:-1]))
                    break

        for target in targets:
            os.remove(target+'.txt')
            os.remove(target.replace('labels', 'images', 1)+'.jpg')

malformat_filter(['data/train/', 'data/valid/', 'data/test/'])

In [None]:
# Obtain all classes and create related directories by original yaml file
import yaml
names = []
def create_class_dir(yaml_file, target_root):
    global names
    os.makedirs(f'./{target_root}', exist_ok=True)
    with open(yaml_file, 'r') as f:
        yaml_content = yaml.safe_load(f)
        names = [n for n in yaml_content['names'] if not n.startswith('0')]
        print(names)
        f.close()
    for name in names:
        os.makedirs(target_root + 'train/' + name, exist_ok=True)
        # os.makedirs(target_root + 'val/' + name, exist_ok=True)

create_class_dir('./data/data.yaml', './classification_raw_data/')

In [None]:
# Remove all tiles with label starting with '0'
import yaml

def remove_tile_startswith_0(paths):
    for path in paths:
        for file in os.listdir(path+'labels'):
            lines = []
            label_path = path+'labels/'+file
            with open(label_path, 'r') as f:
                lines = f.readlines()

            newLabelContent = []
            for line in lines:
                # shift class id
                words = line.split()
                cls_id = int(words[0]) - 4
                words[0] = str(cls_id)
                line = ' '.join(words)

                if line.startswith('-'):
                    continue

                print(line)
                newLabelContent.append(line+'\n')

            with open(label_path, 'w') as f:
                f.writelines(newLabelContent)

remove_tile_startswith_0(['data/train/', 'data/valid/', 'data/test/'])

In [None]:
# Remove all tiles with label starting with '0'
import yaml

def remove_tile_startswith_0(paths):
    for path in paths:
        for file in os.listdir(path+'labels'):
            lines = []
            label_path = path+'labels/'+file
            with open(label_path, 'r') as f:
                lines = f.readlines()

            newLabelContent = []
            for line in lines:
                # shift class id
                words = line.split()
                cls_id = int(words[0]) - 4
                words[0] = str(cls_id)
                line = ' '.join(words)

                if line.startswith('-'):
                    continue

                print(line)
                newLabelContent.append(line+'\n')

            with open(label_path, 'w') as f:
                f.writelines(newLabelContent)

remove_tile_startswith_0(['data/train/', 'data/valid/', 'data/test/'])

In [None]:
!pip install opencv-python

In [None]:
# Reorganize folder structure to accomodate the classification training task
import os
import shutil
import cv2
from PIL import Image
import numpy as np
from datetime import datetime

# Orginal folder is splited into "train/", "valid/" and "test/"
def reoganize_folders(paths):
    print(names)
    for path in paths:
        imgDir = path + 'images/'
        labelDir = path + 'labels/'
        for labelFile in os.listdir(labelDir):
            tiles = []
            img = cv2.imread(imgDir + '.'.join(labelFile.split('.')[:-1] + ['jpg']))
            h, w = img.shape[:2]

            with open(labelDir+labelFile, 'r') as f:
                tiles = f.readlines()
                for idx, tile in enumerate(tiles):
                    # Skip for empty lines
                    if not tile:
                        continue

                    cls_id, x_c, y_c, bw, bh = map(float, tile.strip().split())
                    cls_id = int(cls_id)

                    # Denormalize
                    x1 = int((x_c - bw/2) * w)
                    y1 = int((y_c - bh/2) * h)
                    x2 = int((x_c + bw/2) * w)
                    y2 = int((y_c + bh/2) * h)

                    # Clamp
                    x1, y1 = max(0, x1), max(0, y1)
                    x2, y2 = min(w, x2), min(h, y2)

                    crop = img[y1:y2, x1:x2]
                    cls_name = names[cls_id]
                    current_time = datetime.now()
                    crop_path = f"./classification_raw_data/{path.split('/')[1]}/{cls_name}/{current_time}.jpg"
                    cv2.imwrite(crop_path, crop)
                    print(f"Saved: {crop_path}")


reoganize_folders(['data/train/', 'data/valid/', 'data/test/'])

In [None]:
!pip install ultralytics

In [None]:
!nvidia-msi

In [None]:
# @title Train the classification model
from ultralytics import YOLO

model = YOLO('yolo11s-cls.yaml')
model.reset_weights()


result = model.train(
    data='classification_raw_data/train',
    epochs=100,
    device=-1
)

In [None]:
model.save(f'MJ_tile_detection_classification_yolo11.pt')

In [None]:
!zip -r classification_runs.zip runs/
!cp classification_runs.zip /content/drive/MyDrive/FYP/10000_dataset_binary/
!cp MJ_tile_detection_classification_yolo11.pt /content/drive/MyDrive/FYP/10000_dataset_binary/