In [27]:
import matplotlib.pyplot as plt
from collections import defaultdict
import os
import cv2 as cv
import numpy as np
from tqdm import tqdm

In [28]:
# Read ear haarcascades
left_cascade = cv.CascadeClassifier("./data/haarcascade_mcs_leftear.xml")
right_cascade = cv.CascadeClassifier("./data/haarcascade_mcs_rightear.xml")

In [29]:
def visualize(image, bbox, color=(0, 255, 0)):
    # Draw the ground truth, color is green by default
    for x, y, w, h in bbox:

        cv.rectangle(image, (x, y), (x+w, y+h), color, 2)
        
        """
        # Anotate every corner of the rectangle with the coordinates, add some offset to coordinates
        offset = -10 if color == (0, 255, 0) else 10
        cv.putText(image, f"{x}, {y}", (x, y+offset), cv.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
        cv.putText(image, f"{x+w}, {y}", (x+w, y+offset), cv.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
        cv.putText(image, f"{x}, {y+h}", (x, y+h+offset), cv.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
        cv.putText(image, f"{x+w}, {y+h}", (x+w, y+h+offset), cv.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
        """

    return image

def store_image(image, rectangle, path):
    # Crop image to the rectangle
    x, y, w, h = rectangle
    image = image[y:y+h, x:x+w]
    
    # Save the image
    cv.imwrite(path, image)

def calculate_iou(bb1, bb2):
    """Calculate IOU between two bounding boxes.
    Format of bounding boxes: [x, y, w, h]
    """
    # Get the coordinates of the bounding boxes
    x1, y1, w1, h1 = bb1[0]
    x2, y2, w2, h2 = 0, 0, 0, 0
    for ear in bb2:
        for x, y, w, h in ear:
            x2, y2, w2, h2 = x, y, w, h

    # Calculate the intersection area, x1 and y1 are the top left coordinates
    x_left = max(x1, x2)
    y_bottom = max(y1, y2)
    x_right = min(x1+w1, x2+w2)
    y_top = min(y1+h1, y2+h2)
    
    # Calculate the intersection area
    intersection_area = abs(max((x_right - x_left, 0)) * max((y_top - y_bottom), 0))

    # Calculate the union area
    boxAArea = abs(w1 * h1)
    boxBArea = abs(w2 * h2)

    union_area = float(boxAArea + boxBArea - intersection_area)

    # Calculate the IOU
    iou = intersection_area / union_area

    return round(iou, 4)


def get_gt(image, name, data_path):
    # Read the parameters from the txt file
    # id, ear_center_x/image_width, ear_center_y/image_height, ear_width/image_width, ear_height/image_height
        
    # Get the connected image parameters
    txt_file = [file for file in os.listdir(
        data_path) if file.startswith(name) and file.endswith(".txt")][0]

    # Get image width and height
    image_width = image.shape[1]
    image_height = image.shape[0]

    with open(os.path.join(data_path, txt_file), "r") as f:
        _, center_x, center_y, width, height = f.readlines()[0].split(" ")
        ear_width = int(float(width) * image_width)
        ear_height = int(float(height) * image_height)
        start_x = int(float(center_x) * image_width - ear_width/2)
        start_y = int(float(center_y) * image_height - ear_height/2)
        return [[start_x, start_y, ear_width, ear_height]]

In [30]:
def viola_jones(gt, curr_image, scaleFact=1.05, minNeigh=5, save_image=False, name=""):
    """
    Function for detecting ears using Viola-Jones algorithm. Returns the IOU for the detected ears.
    @param gt: Ground truth bounding box coordinates
    @param curr_image: Current image
    @param scaleFact: Scale factor for the algorithm
    @param minNeigh: Minimum neighbors for the algorithm
    @param save_image: Boolean for saving the image, if true, save cropped ear for detected ears and gt
    @param name: Name of the image
    """

    if save_image:
        # Save the ground truth
        store_image(curr_image, gt[0], f'./data/work/VJ/gt/gt_{name}')

    # Detect ears, returns x, y, w, h
    ears = [left_cascade.detectMultiScale(curr_image, scaleFactor=scaleFact, minNeighbors=minNeigh),
            right_cascade.detectMultiScale(curr_image, scaleFactor=scaleFact, minNeighbors=minNeigh)]
    
    # Continue if none of the ears are detected
    if len(ears[0]) == 0 and len(ears[1]) == 0:
        #print(f"No ears detected for image {name}")
        return 0, "Not detected"
    
    # Iterate ears array which contains subarrays with the coordinates of the ears
    for ear in ears:
        # Iterate the coordinates of the ears
        for x, y, w, h in ear:
            # Draw the bounding box
            if save_image:
                # Save the detected ear
                store_image(curr_image, (x, y, w, h), f'./data/work/VJ/detected/{name}')
            pass

    return calculate_iou(gt, ears), "Detected"

In [31]:
def calculate_vj_iou(image_objects, scaleFact=1.05, minNeigh=5):
    """
    Runner function for calculating the IOU for Viola-Jones algorithm for all images in the dataset.
    """
    # Create a dictionary for storing the results
    image_objects_iou = {}

    for name, (curr_image, gt) in tqdm(image_objects.items()):
        # Run a default Viola-Jones algorithm
        image_iou, status = viola_jones(gt, curr_image, scaleFact, minNeigh, save_image=True, name=name)

        # Store the IOU
        image_objects_iou[name] = (image_iou, status)
    
    return image_objects_iou
        

In [32]:
def test_vj_parameters(gts, images):
    vj_iou_scaleFactor =defaultdict(list)
    vj_iou_minNeigh =defaultdict(list)

    vj_iou_together = defaultdict(list)
    for gt, curr_image in tqdm(zip(gts, images)):
        # Get the Viola-Jones result for multiple parameters
        for scaleFact in np.arange(1.05, 1.4, 0.05):
            scaleFact = round(scaleFact, 2)
            for minNeigh in range(4, 9, 1):
                iou = viola_jones(gt, curr_image, scaleFact, minNeigh)
                # Skip if no ears are detected
                vj_iou_scaleFactor[scaleFact].append(iou)
                vj_iou_minNeigh[int(minNeigh)].append(iou)
                vj_iou_together[f"SF{scaleFact}_MN{minNeigh}"].append(iou)
                # Plot the IOU for each scale factor, x axis is the scale factor key and y axis is the IOU averaged for each scale factor
    

    # Create a single plot with xlabels being vj_iou_together keys and y axis being the mean IOU for each key, taking first value
    fig = plt.figure(figsize=(8, 8))
    ax = fig.add_subplot(111)

    # Plot the IOU for each scale factor, x axis is the scale factor key and y axis is the IOU averaged for each scale factor
    ax.plot(vj_iou_together.keys(), [np.mean([iou for iou, _ in value]) for value in vj_iou_together.values()])

    # Rotate the xticks
    plt.xticks(rotation=75)

    # Decrease font size
    plt.xticks(fontsize=8)

    # Add a grid
    plt.grid(True, alpha=0.4)

    # Add labels and title
    plt.xlabel("Scale factor and min neighbors")
    plt.ylabel("IOU")
    plt.title("Average IOU for each scale factor and min neighbors")

    # Save the plot
    plt.savefig("./data/plots/IOU_scaleFactor_minNeigh.png")

## Loading images and ground truths

In [33]:
# Load the images for running VJ
data_path = "./data/ears"

# Get images
image_names = [file for file in os.listdir(data_path) if file.endswith(".png")]

# Convert the images to cv objects
image_objects = [cv.imread(os.path.join(data_path, image)) for image in image_names]

# Get the ground truths
gts = [get_gt(image, name.split(".")[0], data_path) for name, image in zip(image_names, image_objects)]

# Create a dict for images, key is image name, value is a list of it's cv object and gt
image_object_dict = {name: [image, gt] for name, image, gt in zip(image_names, image_objects, gts)}

# Testing various VJ parameters for different results

In [34]:
np.random.seed(42)

# Randomly sample n images for testing
random_images = np.random.choice(list(image_object_dict.keys()), 500, replace=False)


# Get the images and gts for the random images
random_image_objects = [image_object_dict[image][0] for image in random_images]
random_gts = [image_object_dict[image][1] for image in random_images]

# Run the testing for the random images
#test_vj_parameters(random_gts, random_image_objects)

In [35]:
n = 500
image_objects_test = {key: value for key, value in list(image_object_dict.items())[:n]} 
image_objects_iou = calculate_vj_iou(image_objects_test, scaleFact=1.05, minNeigh=4)

100%|██████████| 500/500 [02:23<00:00,  3.48it/s]


In [36]:
# Number of images 
print(f"Number of images: {len(image_objects_iou)}")

# Count how many were not detected
nd = [iou for iou in image_objects_iou.values() if iou[1] == "Detected"]
print(f"Number of images detected: {len(nd)}")

# Print average IOU if detected
iou = [iou for iou in image_objects_iou.values() if iou[1] == "Detected"]
print(f"Average IOU for detected images: {round(np.mean([iou[0] for iou in iou]), 4)}")

# Print average IOU for all images
iou = [iou[0] for iou in image_objects_iou.values()]
print(f"Average IOU for all images: {round(np.mean(iou), 4)}")

# Print number of false positives
fp = [iou for iou in image_objects_iou.values() if iou[0] == 0 and iou[1] == "Detected"]
print(f"Number of false positives: {len(fp)}")


# Results for VJ
"""
# Scale factor 1.05, min neighbors 5
Number of images: 500
Number of images detected: 221
Average IOU for detected images: 0.5492
Average IOU for all images: 0.2427
Number of false positives: 32

# Scale factor 1.05, min neighbors 4
Number of images: 500
Number of images detected: 241
Average IOU for detected images: 0.515
Average IOU for all images: 0.2482
Number of false positives: 47
"""

Number of images: 500
Number of images detected: 241
Average IOU for detected images: 0.515
Average IOU for all images: 0.2482
Number of false positives: 47


'\nNumber of images: 500\nNumber of images detected: 221\nAverage IOU for detected images: 0.5492\nAverage IOU for all images: 0.2427\nNumber of false positives: 32\n'

## LBP
Use LBP to calculate distance between detected cropped regions and GT cropped regions of ears.

In [51]:
def calculate_distances(sim_scores, name=""):
    # Print average cosine similarity
    cos_sim = [sim for sim, _ in sim_scores.values()]

    # Print average euclidean distance
    euclidean_dist = [dist for _, dist in sim_scores.values()]

    print(f"Average cosine similarity for {name}: {round(np.mean(cos_sim), 4)}, euclidean distance: {round(np.mean(euclidean_dist), 4)}")
    #print(f"Average euclidean distance for {name}: {round(np.mean(euclidean_dist), 4)}")

In [53]:
def calculate_lbp_custom(names, images):
    # Convert image to grayscale -> iterate every pixel -> get the 8 neighbors -> compare the neighbors with the center pixel

    lbps = {}
    for name, image in tqdm(zip(names,images)):
        # Convert the image to grayscale
        gray = cv.cvtColor(image, cv.COLOR_BGR2GRAY)

        # Initialize the LBP result
        lbp_result = np.zeros_like(gray, dtype=np.uint8)

        # Define the 8 neighbors for each pixel
        neighbors = [(0, 1), (1, 1), (1, 0), (1, -1), (0, -1), (-1, -1), (-1, 0), (-1, 1)]

        # Calculate LBP for each pixel
        rows, cols = gray.shape
        for i in range(1, rows - 1):
            for j in range(1, cols - 1):
                binary_pattern = ''
                center_value = gray[i, j]

                for neighbor in neighbors:
                    ni, nj = i + neighbor[0], j + neighbor[1]
                    binary_pattern += '1' if gray[ni, nj] >= center_value else '0'

                # Convert binary pattern to decimal and assign it to the result
                lbp_result[i, j] = int(binary_pattern, 2)
        lbps[name] = lbp_result

    return lbps

In [47]:
def calculate_lbp_custom_improve(names, images,radius=1, num_neighbors=8):
    lbps = {}
    for name, image in tqdm(zip(names, images)):
        gray = cv.cvtColor(image, cv.COLOR_BGR2GRAY)
        lbp_result = np.zeros_like(gray, dtype=np.uint8)
        neighbors = []  # Calculate circular neighbors for a given radius and number of neighbors
        for k in range(num_neighbors):
            x = int(-radius * np.sin(2.0 * np.pi * k / num_neighbors))
            y = int(radius * np.cos(2.0 * np.pi * k / num_neighbors))
            neighbors.append((x, y))

        rows, cols = gray.shape
        for i in range(radius, rows - radius):
            for j in range(radius, cols - radius):
                binary_pattern = ''
                center_value = gray[i, j]
                for neighbor in neighbors:
                    ni, nj = i + neighbor[0], j + neighbor[1]
                    binary_pattern += '1' if gray[ni, nj] >= center_value else '0'

                decimal_value = int(binary_pattern, 2)
                # Implement Uniform LBP
                num_transitions = sum(1 for x, y in zip(binary_pattern, binary_pattern[1:] + binary_pattern[0]) if x != y)
                if num_transitions <= 2:
                    lbp_result[i, j] = decimal_value | np.left_shift(1, num_neighbors)  # Mark uniform patterns
                lbp_result[i, j] = decimal_value

        # Create histogram of LBP
        hist, _ = np.histogram(lbp_result, bins=np.arange(0, 2**(num_neighbors+1), 1))
        lbps[name] = hist / np.sum(hist)  # Normalize histogram

    return lbps

In [22]:
from scipy.spatial.distance import euclidean
from sklearn.metrics.pairwise import cosine_similarity

def compare_cropped_lbps(detected, gt):
    """
    Compare LBPs of two images
    @param detected: Detected image dict {name:lbp}
    @param gt: Ground truth image dict {name:lbp}
    """ 

    similarity_scores = {}

    for (name, det), gt in tqdm(zip(detected.items(), gt.values())):
        # Transform the images to a one-dimensional vector
        det = det.flatten()
        gt = gt.flatten()

        # Calculate the cosine similarity
        cos_similarity = cosine_similarity(det.reshape(1, -1), gt.reshape(1, -1))

        # Calculate euclidean distance
        euclidean_dist = euclidean(det, gt)

        #print(f"Image {name} cosine similarity: {cos_similarity}, euclidean distance: {euclidean_dist}")

        similarity_scores[name] = (cos_similarity, euclidean_dist)
    
    return similarity_scores

In [23]:
data_path = "./data/work/VJ/"

# Get detected images
detected_images = [file for file in os.listdir(data_path+"detected")]
image_names = [image.split(".")[0]+"" for image in detected_images]
# Get their GT images
gt_images = [file for file in os.listdir(data_path+"gt") if file.split("_")[-1] in detected_images]

# Load the images, scale them to 128x128
detected_image_objects = [cv.resize(cv.imread(os.path.join(data_path+"detected", image)), (128, 128)) for image in detected_images]
gt_image_objects = [cv.resize(cv.imread(os.path.join(data_path+"gt", image)), (128, 128)) for image in gt_images]


Custom implementation

In [54]:
# Calculate LBP for both lists
detected_lbps = calculate_lbp_custom(image_names, detected_image_objects)
gt_lbps = calculate_lbp_custom(image_names, gt_image_objects)

sim_scores = compare_cropped_lbps(detected_lbps, gt_lbps)

calculate_distances(sim_scores, "custom LBP")

221it [00:08, 26.83it/s]
221it [00:08, 26.74it/s]
221it [00:00, 1163.15it/s]

Average cosine similarity for custom LBP: 0.7692, euclidean distance: 18273.3273





Custom implementation with additional improvement

In [46]:
len(detected_image_objects)

221

In [55]:

for radius in [1, 2, 3]:
    detected_lbps = calculate_lbp_custom_improve(image_names, detected_image_objects, radius=radius)
    gt_lbps = calculate_lbp_custom_improve(image_names, gt_image_objects, radius=radius)
    sim_scores = compare_cropped_lbps(detected_lbps, gt_lbps)
    calculate_distances(sim_scores, "custom LBP with uniform patterns, histogram for radius = "+str(radius))


221it [00:15, 14.49it/s]
221it [00:15, 14.25it/s]
221it [00:00, 2540.44it/s]


Average cosine similarity for custom LBP with uniform patterns, histogram for radius = 1: 0.961, euclidean distance: 0.0702


221it [00:18, 12.17it/s]
221it [00:18, 12.07it/s]
221it [00:00, 2662.74it/s]


Average cosine similarity for custom LBP with uniform patterns, histogram for radius = 2: 0.9351, euclidean distance: 0.0603


221it [00:17, 12.58it/s]
221it [00:17, 12.77it/s]
221it [00:00, 2630.99it/s]

Average cosine similarity for custom LBP with uniform patterns, histogram for radius = 3: 0.9407, euclidean distance: 0.058





Scikit implementation. 

In [56]:
from skimage import feature

# Use scikit LBP

for radius in [1, 2, 3]:
    # Calculate LBP for both lists, convert to gray first, create dict
    detected_lbps = {name: feature.local_binary_pattern(cv.cvtColor(image, cv.COLOR_BGR2GRAY), radius*8, radius, method='uniform') for name,image in zip(image_names, detected_image_objects)}
    gt_lbps = {name: feature.local_binary_pattern(cv.cvtColor(image, cv.COLOR_BGR2GRAY), radius*8, radius, method='uniform') for name,image in zip(image_names, gt_image_objects)}

    sim_scores = compare_cropped_lbps(detected_lbps, gt_lbps)
    calculate_distances(sim_scores, name="scikit LBP with radius " + str(radius))



221it [00:00, 1257.09it/s]


Average cosine similarity for scikit LBP with radius 1: 0.8718, euclidean distance: 324.9986


221it [00:00, 1389.94it/s]


Average cosine similarity for scikit LBP with radius 2: 0.8372, euclidean distance: 756.4613


221it [00:00, 1398.75it/s]

Average cosine similarity for scikit LBP with radius 3: 0.8235, euclidean distance: 1244.5595





Pixel-to-pixel comparison of base images only

In [59]:
# Check only pixels

detected = {name: image for name, image in zip(image_names, detected_image_objects)}
gt = {name: image for name, image in zip(image_names, gt_image_objects)}

sim_scores = compare_cropped_lbps(detected, gt)
calculate_distances(sim_scores, name="Pixel to pixel comparison")


221it [00:00, 818.52it/s]

Average cosine similarity for Pixel to pixel comparison: 0.9131, euclidean distance: 33577.2453





Mahotas implementation

In [60]:
import mahotas

numPoints = 24
radius = 3

detected_lbps = {name: mahotas.features.lbp(cv.cvtColor(image, cv.COLOR_BGR2GRAY), radius, numPoints) for name,image in zip(image_names, detected_image_objects)}
gt_lbps = {name: mahotas.features.lbp(cv.cvtColor(image, cv.COLOR_BGR2GRAY), radius, numPoints) for name,image in zip(image_names, gt_image_objects)}

sim_scores = compare_cropped_lbps(detected_lbps, gt_lbps)
calculate_distances(sim_scores, name="mahotas implementation " + str(radius))


221it [00:02, 91.63it/s]

Average cosine similarity for mahotas implementation 3: 0.9715, euclidean distance: 815.7983





# Questionnaire
Scale factor 1.05, min neighbors 5 Number of images: 500 Number of images detected: 221 Average IOU for detected images: 0.5492 Average IOU for all images: 0.2427 Number of false positives: 32
____

Average cosine similarity for custom LBP: 0.7692, euclidean distance: 18273.3273; Average cosine similarity for custom LBP with uniform patterns, histogram for radius = 1: 0.961, euclidean distance: 0.0702; Average cosine similarity for scikit LBP with radius 1: 0.8718, euclidean distance: 324.9986; Average cosine similarity for Pixel to pixel comparison: 0.9131, euclidean distance: 33577.2453; Average cosine similarity for mahotas implementation 3: 0.9715, euclidean distance: 815.7983

___


___
I did not split the dataset. 

___

For VJ i optimized two parameters, scaleFactor and minNeighbors. I ran the VJ detection program with a range of values for both parameters and plotted the graph, scoring average IOU and considering false positives. I found out that for VJ, best parameters are scaleFactor=1.05 and minNeighbors=4 or 5 (4 has better IOU, 5 has less false positives)

___

I implemented next upgrades: - Different radii: I implemented the LBP algorithm so that it takes a parameter for radius with which I calculated where to start and stop the LBP iteration and calculate how long the value from neighbors is; - uniform LBP: i check the binarry pattern for every pixel and count the bit transitions for adjacent pixels. if the number is 2 or less, i add a specific bit that indicates an uniform pattern; -histogram: after calculating LBP, i created a histogram of these values and then normalized it by its sum. This represented the frequency off occurances of each pattern
