# IBB - Assignment #2

pip install -r yolov5/requirements.txt

### Haarcascades

In [1]:
import os
import cv2
import pandas as pd
import numpy as np
import pickle
import matplotlib.pyplot as plt
import torch

In [2]:
image_list = []
image_paths = []
image_annotations = [] # center x, center y, width, height
folder = os.path.join(os.getcwd(), "ear_data", "test")

for filename in os.listdir(folder):
        image = cv2.imread(os.path.join(folder,filename))
        if image is not None:
            image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
            image_list.append(image)
            image_paths.append(filename)
        elif image is None:
            txt = open(os.path.join(folder,filename), 'r')
            line = txt.readline()
            line = line.strip() # remove leading/trailing white spaces
            annotations = [float(item.strip()) for item in line.split(' ')]
            image_annotations.append(annotations)
            txt.close()
            

In [3]:
ear_df = pd.DataFrame(image_list, columns=['Image'])
ear_df.insert(1, "Image label",image_paths, False)
ear_df.insert(2, "Image annotations",image_annotations, True)

  values = np.array([convert(v) for v in values])


In [28]:
def detect_ears(img, l_cascade, r_cascade):
    height = img.shape[0]
    width = img.shape[1]

    ear_rect_l = l_cascade.detectMultiScale(img)
    ear_rect_r = r_cascade.detectMultiScale(img)
    
    if len(ear_rect_l) != 0:
        predictions = ear_rect_l[0]
        predictions_processed = [float(predictions[0]/width), float(predictions[1]/height), float(predictions[2]/width), float(predictions[3]/height)]
    elif len(ear_rect_r) != 0:
        predictions = ear_rect_r[0]
        predictions_processed = [float(predictions[0]/width), float(predictions[1]/height), float(predictions[2]/width), float(predictions[3]/height)]
    else:
        predictions = (None,None,None,None)
        predictions_processed = [0,0,0,0]

    return predictions_processed


def get_accuracy(predictions):
    final_acc = []
    for index, values in enumerate(predictions):
        accuracies = []
        for i, v in enumerate(values):
            if v != 0:
                intermediate_accuracy = 1-abs(v-image_annotations[index][i+1])
            else:
                intermediate_accuracy = 0
            accuracies.append(intermediate_accuracy)
        final_acc.append(np.mean(accuracies))
    return final_acc # np.mean(final_acc)


def test_detect_ears_parameters(img, l_cascade, r_cascade, scales, neighbours):
    height = img.shape[0]
    width = img.shape[1]

    # intermediate_image_predictions = []
    image_predictions = []
    
    for scale in scales:
        for neighbour in neighbours:
            images_predictions = []
            for img in image_list:
                ear_rect_l = l_cascade.detectMultiScale(img, scale, neighbour)
                ear_rect_r = r_cascade.detectMultiScale(img, scale, neighbour)

                if len(ear_rect_l) != 0:
                    predictions = ear_rect_l[0]
                    predictions_processed = [float(predictions[0]/width), float(predictions[1]/height), float(predictions[2]/width), float(predictions[3]/height)]
                elif len(ear_rect_r) != 0:
                    predictions = ear_rect_r[0]
                    predictions_processed = [float(predictions[0]/width), float(predictions[1]/height), float(predictions[2]/width), float(predictions[3]/height)]
                else:
                    predictions = (None,None,None,None)
                    predictions_processed = [0,0,0,0]

                images_predictions.append(predictions_processed)
            image_predictions.append(images_predictions)

    return image_predictions

In [12]:
# Read in the cascade classifiers for ears
left_ear_cascade = cv2.CascadeClassifier('haarcascade_mcs_leftear.xml')
right_ear_cascade = cv2.CascadeClassifier('haarcascade_mcs_rightear.xml')

# get predictions
predictions_vj = []
for i in image_list:
    predictions_vj.append(detect_ears(i, left_ear_cascade, right_ear_cascade))

# get accuracy of predictions
vj_accuracy_list = get_accuracy(predictions_vj)
vj_accuracy = np.mean(vj_accuracy_list)

In [31]:
# test parameters to fine-tune predictions
scales = [1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 1.8, 1.9, 2]
neighbours = [0, 1, 2, 3, 4, 5, 6]

In [None]:
# fine-tuning predictions
fine_tune_runs_vj = test_detect_ears_parameters(i, left_ear_cascade, right_ear_cascade, scales, neighbours)

In [3]:
# Save results
# with open("predictions_vj_ft.pickle","wb") as f:
#    pickle.dump(fine_tune_runs_vj, f)

# Load dataframe
with open("fine_tune_runs_vj.pickle","rb") as f:
    fine_tune_runs_vj = pickle.load(f)

In [6]:
ear_df.insert(3, "VJ",predictions_vj, True)
ear_df.insert(4, "VJ Accuracy", vj_accuracy_list, False)

In [4]:
# Save dataframe
# with open("ear_df.pickle","wb") as f:
#    pickle.dump(ear_df, f)

# Load dataframe
with open("ear_df.pickle","rb") as f:
    ear_df = pickle.load(f)

In [34]:
# Create column names 
column_names = []
name_base = "VJ "
for s in scales:
    for n in neighbours:
        column_names.append(name_base + "S=" + str(s) + " N=" + str(n))

# Insert predictions in dataframe
for i in range(0, len(fine_tune_runs_vj)): # list of len 70
    for j in range(len(fine_tune_runs_vj[i])):
        if fine_tune_runs_vj[i][j][0] == None: fine_tune_runs_vj[i][j][0] = 0
        if fine_tune_runs_vj[i][j][1] == None: fine_tune_runs_vj[i][j][1] = 0
        if fine_tune_runs_vj[i][j][2] == None: fine_tune_runs_vj[i][j][2] = 0
        if fine_tune_runs_vj[i][j][3] == None: fine_tune_runs_vj[i][j][3] = 0
    del ear_df[column_names[i]]
    ear_df.insert(5+i, column_names[i] ,fine_tune_runs_vj[i], False)


In [12]:
def get_IoU(predictions):
    intersection_areas = []
    union_areas = []
    
    for index, values in enumerate(predictions):
        intersection_areas.append(max(0, -abs(predictions[index][0] - image_annotations[index][1]) + 1) * max(
            0, -abs(predictions[index][1] - image_annotations[index][2]) + 1) if values[0] != 0 else 0)

    union_areas = [2-inter if predictions[index][0] !=
                   0 else 2 for index, inter in enumerate(intersection_areas)]

    # IoU list for all images
    IoU = [i/u for _,
           (i, u) in enumerate(zip(intersection_areas, union_areas))]

    return IoU


In [38]:
# Insert all image accuracies and IoUs into new dataframe
acc_column_name_list = []
IoU_column_name_list = []
column_accuracies = []
column_IoUs = []
total_IoUs = []

for c in column_names:
    column_acc_results = get_accuracy(ear_df[c])
    column_acc = np.mean(column_acc_results)
    column_accuracies.append(column_acc)
    acc_column_name = "Acc " + c
    acc_column_name_list.append(acc_column_name)
    del ear_df[acc_column_name]
    ear_df.insert(len(ear_df.columns), acc_column_name, column_acc_results, False)

    IoU_column_name = "IoU " + c
    IoU_column_name_list.append(IoU_column_name)
    column_IoU_results = get_IoU(ear_df[c])
    total_IoUs.extend(column_IoU_results)
    column_IoU_mean = np.mean(column_IoU_results)
    column_IoUs.append(column_IoU_mean)
    del ear_df[IoU_column_name]
    ear_df.insert(len(ear_df.columns), IoU_column_name, column_IoU_results, False)


In [136]:
ear_params_df = pd.DataFrame(column_names, columns=['Image parameters'])
ear_params_df.insert(1, "Accuracies",column_accuracies, False)
ear_params_df.insert(2, "IoUs",column_IoUs, False)

In [5]:
# Save dataframe
# with open("ear_params_df.pickle","wb") as f:
#    pickle.dump(ear_params_df, f)

# Load dataframe
with open("ear_params_df.pickle","rb") as f:
    ear_params_df = pickle.load(f)

In [138]:
# Get 5 best results based on accuracy
ear_params_df.sort_values(by=['Accuracies'], ascending=False)[:5]

Unnamed: 0,Image parameters,Accuracies,IoUs
0,VJ S=1.1 N=0,0.783817,0.493783
7,VJ S=1.2 N=0,0.726743,0.465292
14,VJ S=1.3 N=0,0.674724,0.439796
21,VJ S=1.4 N=0,0.6358,0.417217
28,VJ S=1.5 N=0,0.625254,0.410071


In [139]:
# Get 5 worst results based on accuracy
ear_params_df.sort_values(by=['Accuracies'], ascending=True)[:5]

Unnamed: 0,Image parameters,Accuracies,IoUs
69,VJ S=2 N=6,0.0192,0.016354
55,VJ S=1.8 N=6,0.023048,0.01948
62,VJ S=1.9 N=6,0.023132,0.019473
27,VJ S=1.4 N=6,0.026978,0.023258
48,VJ S=1.7 N=6,0.028924,0.024869


In [6]:
def get_pr_graphs(results, best_or_worst):
    counter = 1
    for result in results:    
        precisions = []
        for t in range(99):
            threshold = (t+1)/100
            precisions.append(len([i for i in ear_df["IoU " + result[0]] if i > threshold])/500) # IoU list has 500 elements

        fig, ax = plt.subplots()
        fig.canvas.draw()
        plt.plot(precisions)

        labels = [item.get_text() for item in ax.get_xticklabels()]
        labels = [0,0,0.2,0.4,0.6,0.8,1]
        plt.title("Precision-Recall Haarcascades")
        ax.set_xticklabels(labels)
        plt.savefig("Precision-Recall-Haarcascades-" + best_or_worst + "-" +str(counter))
        plt.show()

        counter += 1

In [19]:
def draw_prediction_and_gt(img,index,prediction_ear,gt):

    ear_img = img.copy()
    if prediction_ear != [0, 0, 0, 0]:
        cv2.rectangle(ear_img, (int(prediction_ear[0]*ear_img.shape[1]), int(prediction_ear[1]*ear_img.shape[0])), (int((prediction_ear[0] + prediction_ear[2])*ear_img.shape[1]), int((prediction_ear[1] +prediction_ear[3])*ear_img.shape[0])), (255, 0, 0), 6)
        cv2.rectangle(ear_img, (int(gt[1]*ear_img.shape[1]), int(gt[2]*ear_img.shape[0])), (int((gt[1] + gt[3])*ear_img.shape[1]), int((gt[2] + gt[4])*ear_img.shape[0])), (255, 255, 255), 2)
        cv2.imshow(ear_df["Image label"][index], ear_img)
        cv2.waitKey(0)
        cv2.destroyAllWindows()

    return ear_img

In [None]:
# Precision-Recall for the baseline VJ
get_pr_graphs([["VJ S=1.1 N=3"]] ,"Baseline")

In [23]:
# Precision-Recall for the 5 parameter combinations with the best accuracy
top_5_results = ear_params_df.sort_values(by=['Accuracies'], ascending=False)[:5].values
get_pr_graphs(top_5_results, "Best")

In [None]:
# Precision-Recall for the 5 parameter combinations with the worst accuracy
bottom_5_results = ear_params_df.sort_values(by=['Accuracies'], ascending=True)[:5].values
get_pr_graphs(bottom_5_results, "Worst")

In [None]:
# Get 5 best results based on the best parameter combinations
ear_df.sort_values(by=["Acc VJ S=1.1 N=0"], ascending=False)[:5]

# 89     0.985078 0590.png
# 163    0.982411 0664.png
# 220    0.981269 1921.png
# 229    0.981180 1930.png
# 226    0.980142 1927.png

In [49]:
image_best_match_1 = draw_prediction_and_gt(image_list[89], 89, ear_df["VJ S=1.1 N=0"][89], image_annotations[89])
image_best_match_2 = draw_prediction_and_gt(image_list[163], 163, ear_df["VJ S=1.1 N=0"][163], image_annotations[163])
image_best_match_3 = draw_prediction_and_gt(image_list[220], 18, ear_df["VJ S=1.1 N=0"][220], image_annotations[220])
image_best_match_4 = draw_prediction_and_gt(image_list[229], 18, ear_df["VJ S=1.1 N=0"][229], image_annotations[229])
image_best_match_5 = draw_prediction_and_gt(image_list[226], 18, ear_df["VJ S=1.1 N=0"][226], image_annotations[226])

In [None]:
# Precision-Recall for the 10 worst accuracies
bottom_10_results = ear_params_df.sort_values(by=['Accuracies'], ascending=True)[:10].values
get_pr_graphs(bottom_10_results, "Worst")

In [20]:
image_worst_match_1 = draw_prediction_and_gt(image_list[8], 8, ear_df["VJ S=2 N=6"][8], image_annotations[8])
image_worst_match_2 = draw_prediction_and_gt(image_list[35], 35, ear_df["VJ S=2 N=6"][35], image_annotations[35])
image_worst_match_3 = draw_prediction_and_gt(image_list[78], 78, ear_df["VJ S=2 N=6"][78], image_annotations[78])
image_worst_match_4 = draw_prediction_and_gt(image_list[101], 101, ear_df["VJ S=2 N=6"][101], image_annotations[101])
image_worst_match_5 = draw_prediction_and_gt(image_list[138], 138, ear_df["VJ S=2 N=6"][138], image_annotations[138])
image_worst_match_6 = draw_prediction_and_gt(image_list[152], 152, ear_df["VJ S=2 N=6"][152], image_annotations[152])
image_worst_match_7 = draw_prediction_and_gt(image_list[276], 276, ear_df["VJ S=2 N=6"][276], image_annotations[276])
image_worst_match_8 = draw_prediction_and_gt(image_list[334], 334, ear_df["VJ S=2 N=6"][334], image_annotations[334])
image_worst_match_9 = draw_prediction_and_gt(image_list[357], 357, ear_df["VJ S=2 N=6"][357], image_annotations[357])
image_worst_match_10 = draw_prediction_and_gt(image_list[18], 18, ear_df["VJ S=1.8 N=6"][18], image_annotations[18])

# Mismatches:
# 8 S=2 N=6
# 35 ... 
# 78
# 101
# 138
# 152
# 276
# 334
# 357
# 18 S=1.8

In [None]:
# Baseline
image_match = draw_prediction_and_gt(image_list[18], ear_df["VJ S=1.1 N=0"][18], image_annotations[18])

### YOLOv5

In [None]:
yolov5_model = torch.hub.load(
    "yolov5", 'custom', path="yolo5s.pt", source='local')

img = 'ear_data/test/0501.png'
yolov5_results = yolov5_model(img)
# Results, change the flowing to: results.show()
yolov5_results.show()  # or .show(), .save(), .crop(), .pandas(), etc

In [7]:
yolov5_results_1st_fifth = yolov5_model(image_list[0:100])
yolov5_results_2nd_fifth = yolov5_model(image_list[100:200])
yolov5_results_3rd_fifth = yolov5_model(image_list[200:300])
yolov5_results_4th_fifth = yolov5_model(image_list[300:400])
yolov5_results_5th_fifth = yolov5_model(image_list[400:500])

In [8]:
yolov5_1st_fifth_pos = yolov5_results_1st_fifth.pandas().xyxy
yolov5_2nd_fifth_pos = yolov5_results_2nd_fifth.pandas().xyxy
yolov5_3rd_fifth_pos = yolov5_results_3rd_fifth.pandas().xyxy
yolov5_4th_fifth_pos = yolov5_results_4th_fifth.pandas().xyxy
yolov5_5th_fifth_pos = yolov5_results_5th_fifth.pandas().xyxy

In [7]:
# normalize yolo results [0, 1] 
def normalize_yolo_results(starting_index, yolov5_pos):
    yolov5_norm_results = []
    for index, r in enumerate(yolov5_pos):
        index += starting_index
        if not r["class"].empty:
            yolov5_norm_results.append(
                [r["xmin"].values[0]/image_list[index].shape[1], r["ymin"].values[0]/image_list[index].shape[0], r["xmax"].values[0]/image_list[index].shape[1], r["ymax"].values[0]/image_list[index].shape[0]])
            continue
        yolov5_norm_results.append([0, 0, 0, 0])
    return yolov5_norm_results

In [42]:
yolov5_1st_fifth_pos_normalized = normalize_yolo_results(0, yolov5_1st_fifth_pos)
yolov5_2nd_fifth_pos_normalized = normalize_yolo_results(100, yolov5_2nd_fifth_pos)
yolov5_3rd_fifth_pos_normalized = normalize_yolo_results(200, yolov5_3rd_fifth_pos)
yolov5_4th_fifth_pos_normalized = normalize_yolo_results(300, yolov5_4th_fifth_pos)
yolov5_5th_fifth_pos_normalized = normalize_yolo_results(400, yolov5_5th_fifth_pos)

In [43]:
all_pos_normalized = []
all_pos_normalized.extend(yolov5_1st_fifth_pos_normalized)
all_pos_normalized.extend(yolov5_2nd_fifth_pos_normalized)
all_pos_normalized.extend(yolov5_3rd_fifth_pos_normalized)
all_pos_normalized.extend(yolov5_4th_fifth_pos_normalized)
all_pos_normalized.extend(yolov5_5th_fifth_pos_normalized)

In [9]:
# Save dataframe
# with open("all_pos_normalized.pickle","wb") as f:
#    pickle.dump(all_pos_normalized, f)

# Load dataframe
with open("all_pos_normalized.pickle","rb") as f:
    all_pos_normalized = pickle.load(f)

In [10]:
yolov5_accuracies = []
yolov5_accuracies_mean = []

for index, pos in enumerate(all_pos_normalized):
    yolov5_accuracies.append(np.mean([1-abs(v-image_annotations[index][i+1]) if v != 0 else 0 for i, v in enumerate(pos)]))

yolov5_accuracies_mean.append(np.mean(yolov5_accuracies))

In [13]:
yolov5_IoU_list = get_IoU(all_pos_normalized)
mean_yolov5_IoU = np.mean(yolov5_IoU_list)

In [14]:
# del ear_df["Acc YOLOv5"]
# del ear_df["IoU YOLOv5"]
ear_df.insert(len(ear_df.columns), "Acc YOLOv5", yolov5_accuracies, False)
ear_df.insert(len(ear_df.columns), "IoU YOLOv5", yolov5_IoU_list, False)

In [16]:
# Add YOLOv5 row
ear_params_df = ear_params_df.append({"Image parameters": "YOLOv5", "Accuracies": yolov5_accuracies_mean[0], "IoUs": mean_yolov5_IoU}, ignore_index=True)

In [None]:
# Best YOLOv5 predictions based on accuracy
ear_df.sort_values(by=['Acc YOLOv5'], ascending=False)[:5]["Image label"]

# 32     0.930307 0533.png
# 430    0.906633 2131.png
# 362    0.904298 2063.png
# 148    0.895469 0649.png
# 3      0.891080 0504.png

In [9]:
def get_pr_graphs_yolo(results, best_or_worst):
    counter = 1
   
    precisions = []
    for t in range(99):
        threshold = (t+1)/100
        precisions.append(len([i for i in results if i > threshold])/500) # IoU list has 500 elements

    fig, ax = plt.subplots()
    fig.canvas.draw()
    plt.plot(precisions)

    labels = [item.get_text() for item in ax.get_xticklabels()]
    labels = [0,0,0.2,0.4,0.6,0.8,1]
    plt.title("Precision-Recall YOLOv5")
    ax.set_xticklabels(labels)
    plt.savefig("Precision-Recall-YOLOv5-" + best_or_worst + "-" +str(counter))
    plt.show()

    counter += 1

In [None]:
get_pr_graphs_yolo(yolov5_IoU_list, "Base")

In [12]:
best_img_1_yolo = 'ear_data/test/0533.png'
best_img_2_yolo = 'ear_data/test/2131.png'
best_img_3_yolo = 'ear_data/test/2063.png'
best_img_4_yolo = 'ear_data/test/0649.png'
best_img_5_yolo = 'ear_data/test/0504.png'

yolov5_best_result_1 = yolov5_model(best_img_1_yolo)
yolov5_best_result_2 = yolov5_model(best_img_2_yolo)
yolov5_best_result_3 = yolov5_model(best_img_3_yolo)
yolov5_best_result_4 = yolov5_model(best_img_4_yolo)
yolov5_best_result_5 = yolov5_model(best_img_5_yolo)

In [None]:
yolov5_best_result_1.show()
yolov5_best_result_2.show()
yolov5_best_result_3.show()
yolov5_best_result_4.show()
yolov5_best_result_5.show()