## **Import Libraries**

In [None]:
import pandas as pd
import numpy as np
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
import matplotlib.pyplot as plt

In [None]:
# np.unique(cls)

## **Define Functions**

In [None]:
def read_file_line(file_name):
    with open(file_name) as file:
        file_lines = []
        for line in file:
            file_lines.append(line.replace("\n", ''))
    return file_lines

def get_idx(list):
    idx = []
    for line in list:
        line = line.split(" -> ")[0]
        idx.append(line)
    return idx

def get_class(list):
    cls = []
    for line in list:
        line = line.split(" -> ")[1]
        if line == " " or line == "":
            cls.append("null")
        else:
            cls.append(line)
    return cls

def get_label(list):
    dict = {
        "null": 0,
        "Chaetodon Lununatus": 1,
        "Dascyllus Aruanus": 2,
        "Dascyllus Reticulatus": 3,
        "Pempheris Vanicolensis": 4,
        "Plectrogly-Phidodon Dickii": 5,
    }
    labels = []
    for line in list:
        line = line.split(" -> ")[1]
        if line == " " or line == "":
            labels.append(0)
        else:
            labels.append(dict[line])
    return labels

## **Import Data**

In [None]:
pred = read_file_line("yolov4/mock/log_yolov5_conf50_dalkti.txt")
truth = read_file_line("yolov4/mock/ground_truth_onlyFish_5species_dalkti.txt")

## **Format Data**

In [None]:
idx = get_idx(pred)
cls = get_class(pred)
labels = get_label(pred)

pred_df = pd.DataFrame(list(zip(idx, cls, labels)), columns=["file", "class", "labels"]).drop_duplicates(keep='first')


In [None]:
idx = get_idx(truth)
cls = get_class(truth)
labels = get_label(truth)

truth_df = pd.DataFrame(list(zip(idx, cls, labels)), columns=["file", "class", "labels"]).drop_duplicates(keep='first')


In [None]:
df = pd.merge(pred_df, truth_df, how="inner", on=["file"]).drop_duplicates(keep='first')
df

## **Print Confusion Matrix**

In [None]:
predictions = df["labels_x"]
actual_values = df["labels_y"]

In [None]:
cf = confusion_matrix(
    y_true=actual_values,
    y_pred=predictions,
    labels=[0, 1, 2, 3, 4, 5]
)
cf_display = ConfusionMatrixDisplay(confusion_matrix=cf, display_labels=[0, 1, 2, 3, 4, 5])
cf_display.plot()
plt.figure(dpi=1000)
plt.show()