In [1]:
import custom_utils as cu
from ultralytics import YOLO
import numpy as np
import matplotlib.pyplot as plt

import cv2

labels = ["green", "red", "yellow"]

In [2]:
# prepare environment
import os
from dotenv import load_dotenv
# Load .env files
load_dotenv()

# Get environment variables
dataset_dir = os.getenv('DATASET_DIR')
ori_data_dir = os.getenv('TRAFFIC_LIGHT_ORIGINAL_DATA')
custom_data_dir = os.getenv('TRAFFIC_LIGHT_CUSTOM_DATA')

if not os.path.exists(ori_data_dir):
    print("Error: No original data set")

if not os.path.exists(custom_data_dir):
    os.mkdir(custom_data_dir)

for l in labels:
    label_dir = os.path.join(custom_data_dir, l)
    if not os.path.exists(label_dir):
        os.mkdir(label_dir)

In [None]:
# step 0: get images 
img_dic = {}
for l in labels: 
    paths, images = cu.get_images(os.path.join(ori_data_dir, l), 100)
    print(paths)
    print(os.path.join(ori_data_dir, l))
    img_dic.update({l: images })

In [None]:
def extract_boxes(yolo_res, cls):
    ex_boxes = []
    for res in yolo_res:
        for i, c in enumerate(res.boxes.cls.numpy()):
            if c == cls:
                points = res.boxes.xyxyn.numpy()[i]
                ex_box = cu.extract_rectangle_from_image(cv2.cvtColor(res.orig_img, cv2.COLOR_BGR2RGB), points)
                ex_boxes.append(ex_box)
    return ex_boxes

In [None]:
model = YOLO("yolov8m.pt") # test with sample images shows that, m is a good mid way between accuracy and run time
res_dict = {}

In [None]:
# step 1: analyze images and extract boxes with yolo
for l in labels:
    res = model.predict(img_dic[l], conf=0.3)
    boxes = extract_boxes(res,9)
    res_dict.update({l : boxes})

In [None]:
def average_resize(res_dict):
    #dia_len = [(i.shape[1]**2 + i.shape[2])**0.5 for i in images]
    hights = []
    widths = []
    for l in res_dict:
        hights += [i.shape[0] for i in res_dict[l]]
        widths += [i.shape[1] for i in res_dict[l]]
    #height, width = image.shape[:2]
    avr_x = int(np.average(widths))
    avr_y = int(np.average(hights))
    for l in res_dict:
        for idx, i in enumerate(res_dict[l]):
            res_dict[l][idx] = cv2.resize(i, (avr_x,avr_y))
        #cv2.imwrite(os.path.join(custom_data_dir, label, str(idx), ".jpg"), i) # not nessecary that label is in the name of image
    return res_dict 

In [None]:
# resize all images to the same average hight and width 
res_dict = average_resize(res_dict) 
#i = average_resize(res_dict)
#n = np.array(i["green"])
#print(n.shape)
#len(n[1][2][2])

In [None]:
# save resized images in custom_data
for l in labels:
    path = os.path.join(custom_data_dir, l)
    cu.write_images(res_dict[l], path, l)

In [3]:
from skimage.metrics import structural_similarity as ssim

In [4]:
def find_similar(threshold = 0.6, directory = None, images = None):
    if directory is not None:
        # load pictures 
        _ , images = cu.get_images(directory)

    # Erzeuge eine leere Liste zum Speichern der Tupel
    similar_images = []
    num_images = len(images)

    # compare each picture
    for i in range(num_images):
        for j in range(i + 1, num_images):
            # convert images into grayscale (ssim works only with that)
            gray_image1 = cv2.cvtColor(images[i], cv2.COLOR_BGR2GRAY)
            gray_image2 = cv2.cvtColor(images[j], cv2.COLOR_BGR2GRAY)

            # calculate ssim value
            similarity_score, _ = ssim(gray_image1, gray_image2, full=True)

            # check if ssim is bigger than threshold to find most similar pictures
            if similarity_score > threshold:
                similar_images.append((i, j))

    return np.array(similar_images)


In [5]:
cus_dic = {}
sim_dic = {}
for l in labels:
    custom_cls_path = os.path.join(custom_data_dir, l)
    _ , custom_images = cu.get_images(custom_cls_path)
    cus_dic.update({l : custom_images})
    s = find_similar(threshold=0.8, images=cus_dic[l])
    sim_dic.update({l : s})

In [6]:
def show_image_pairs(image_list, index_pairs):
    current_pair_index = 0
    num_pairs = len(index_pairs)

    while current_pair_index < num_pairs:
        image_indices = index_pairs[current_pair_index]
        image1 = image_list[image_indices[0]]
        image2 = image_list[image_indices[1]]

        combined_image = cv2.hconcat([image1, image2])

        cv2.imshow("Image Pair" + str(index_pairs[current_pair_index]) , combined_image)
        key = cv2.waitKey(0)
        current_pair_index += 1

        #if key == ord("1"):
        #elif key == ord("2"):
        #    cv2.destroyAllWindows()
            break

In [7]:
cls = "green"
show_image_pairs(cus_dic[cls], sim_dic[cls]) # TODO openCV crashes

: 

In [None]:
for l in labels:
    print(len(sim_dic[l]))

# step 3: check images manually and label them

In [None]:
print(type(res_dict["green"][1]))