# Introduction
This notebook showcases the implementation and some results regarding Task 3. This task was splitted into to two subtasks: 3.1 for the object detection and 3.2 for the object segmentation.

The implementation and results of each training for subtask are explained in detail in the report.

# Task 3.1 - Object Detection

## Setup

Installation of the necessary libraries and mouting of google drive.

In [None]:
!pip install ultralytics

In [None]:
from ultralytics import YOLO
from google.colab import drive
import cv2
from google.colab.patches import cv2_imshow
from google.colab import drive
import os

drive.mount('/content/gdrive')

## Training

### Initial training

* 20 epochs

* images containing just 1 lego piece from the provided dataset

In [None]:
model = YOLO("yolov8s.pt") # default pre-trained model

results = model.train(data="/content/gdrive/MyDrive/Colab_Notebooks/VCOM/1_piece/data.yaml", epochs=20, imgsz=640)

### Results
![title](training1/confusion_matrix.png)
![title](training1/F1_curve.png)
![title](training1/PR_curve.png)

### Second Training
* 100 epochs

* images containing number of legos from 1 to 15 from the provided dataset

In [None]:
model = YOLO("yolov8s.pt") # default pre-trained model

results = model.train(data="/content/gdrive/MyDrive/Colab_Notebooks/VCOM/data.yaml", epochs=100, imgsz=640)

### Results
![title](training2/confusion_matrix2.png)
![title](training2/F1_curve2.png)
![title](training2/loss.png)


### Third Training

* 200 epochs

* images containing number of legos from 1 to 15 from the provided dataset

* hyperparmeter "patience" set to 10

In [None]:
model = YOLO("yolov8s.pt") # default pre-trained model

results = model.train(data="/content/gdrive/MyDrive/Colab_Notebooks/VCOM/data.yaml", epochs=200, patience=10 ,imgsz=640)

### Results
![title](training3/confusion_matrix.png)
![title](training3/F1_curve.png)
![title](training3/PR_curve.png)
![title](training3/results.png)

## Test model with test dataset

In [None]:
model = YOLO("/content/gdrive/MyDrive/Colab_Notebooks/VCOM/runs/detect/train/train2/weights/best.pt")
results = model.val(data="/content/gdrive/MyDrive/Colab_Notebooks/VCOM/data2.yaml")
# copy results to drive
!cp -r /content/runs/detect/val /content/gdrive/MyDrive/Colab_Notebooks/VCOM/runs/detect/test

## Simple Detection

In [None]:
# Load a model
import cv2
model = YOLO("/content/gdrive/MyDrive/Colab_Notebooks/VCOM/runs/detect/train/train2/weights/best.pt")

# Predict with the model
results = model(source="/content/gdrive/MyDrive/Colab_Notebooks/test3pieces.jpg")  # predict on an image

# Display result
annotated_image = results[0].plot()
cv2.imwrite("test3pieces_annotated.jpg", annotated_image)

# Task 3.2 - Object Segmentation

Just like task 3.1, the implementation and and thought process behind this task is explained in detail in the report.

In [None]:
import os
import cv2
from ultralytics import YOLO
import numpy as np
import matplotlib.pyplot as plt


model = YOLO("/content/gdrive/MyDrive/Colab_Notebooks/VCOM/runs/detect/train/train2/weights/best.pt")

base_dir = "/content/gdrive/MyDrive/Colab_Notebooks/VCOM/test/images"

for subdir in os.listdir(base_dir):
    subdir_path = os.path.join(base_dir, subdir)
    if os.path.isdir(subdir_path):
        for file in os.listdir(subdir_path):
            file_path = os.path.join(subdir_path, file)

            if file.endswith(".jpg"):
                results = model(file_path, show=False)

                image = cv2.imread(file_path)

                if image is None:
                    print(f"Could not open or find the image: {file_path}")
                    continue

                image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

                # Improve the contrast of the image
                image_lab = cv2.cvtColor(image_rgb, cv2.COLOR_RGB2LAB)
                l, a, b = cv2.split(image_lab)
                l = cv2.equalizeHist(l)
                image_lab = cv2.merge((l, a, b))
                image_rgb = cv2.cvtColor(image_lab, cv2.COLOR_LAB2RGB)

                # Improve the contrast of the image
                #image_hsv = cv2.cvtColor(image_rgb, cv2.COLOR_RGB2HSV)
                #h, s, v = cv2.split(image_hsv)
                #v = cv2.equalizeHist(v)
                #image_hsv = cv2.merge((h, s, v))
                #image_rgb = cv2.cvtColor(image_hsv, cv2.COLOR_HSV2RGB)

                # Improve the contrast of the image
                #clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
                #l = clahe.apply(l)
                #image_lab = cv2.merge((l, a, b))
                #image_rgb = cv2.cvtColor(image_lab, cv2.COLOR_LAB2RGB)

                for box in results[0].boxes.xyxy:

                    x1, y1, x2, y2 = map(int, box.tolist())

                    cropped_image = image_rgb[y1:y2, x1:x2]

                    # Apply Gaussian blur
                    cropped_image = cv2.GaussianBlur(cropped_image, (5, 5), 0)

                    # Convert the cropped image to HSV to better seperate the colors
                    #cropped_image = cv2.cvtColor(cropped_image, cv2.COLOR_RGB2HSV)

                    gray_cropped_image = cv2.cvtColor(cropped_image, cv2.COLOR_RGB2GRAY)

                    # Apply global binary threshold with Otsu's method
                    #_, binary_cropped_image = cv2.threshold(gray_cropped_image, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)

                    # Apply adaptive thresholding
                    #binary_cropped_image = cv2.adaptiveThreshold(gray_cropped_image, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 2)

                    # Apply global binary threshold
                    _, binary_cropped_image = cv2.threshold(gray_cropped_image, 0, 255, cv2.THRESH_BINARY)

                    # Apply GrabCut segmentation
                    mask = np.zeros(cropped_image.shape[:2], np.uint8)
                    bgd_model = np.zeros((1, 65), np.float64)
                    fgd_model = np.zeros((1, 65), np.float64)
                    rect = (1, 1, cropped_image.shape[1] - 2, cropped_image.shape[0] - 2)
                    cv2.grabCut(cropped_image, mask, rect, bgd_model, fgd_model, 5, cv2.GC_INIT_WITH_RECT)
                    mask2 = np.where((mask == 2) | (mask == 0), 0, 1).astype('uint8')
                    segmented_cropped_image = cropped_image * mask2[:, :, np.newaxis]

                    # Apply k-means clustering
                    pixel_values = segmented_cropped_image.reshape((-1, 3))
                    pixel_values = np.float32(pixel_values)
                    criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 10, 0.2)
                    k = 5
                    _, labels, centers = cv2.kmeans(pixel_values, k, None, criteria, 10, cv2.KMEANS_RANDOM_CENTERS)
                    centers = np.uint8(centers)
                    segmented_image = centers[labels.flatten()]
                    segmented_image = segmented_image.reshape(segmented_cropped_image.shape)

                    # Apply morphological operations
                    kernel = np.ones((3,3), np.uint8)
                    segmented_image = cv2.morphologyEx(segmented_image, cv2.MORPH_OPEN, kernel)
                    segmented_image = cv2.morphologyEx(segmented_image, cv2.MORPH_CLOSE, kernel)

                    image_rgb[y1:y2, x1:x2] = segmented_image

                segmented_image_bgr = cv2.cvtColor(image_rgb, cv2.COLOR_RGB2BGR)

                original_image_resized = cv2.resize(cv2.imread(file_path), (800, 800))
                segmented_image_resized = cv2.resize(segmented_image_bgr, (800, 800))

                cv2.imshow("Original Image", original_image_resized)
                cv2.imshow("Segmented Image", segmented_image_resized)

                cv2.waitKey(0)
                cv2.destroyAllWindows()