In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import cv2
import os
import numpy as np
import warnings
import json
from pathlib import Path
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay

# Import functions from error_analysis.py
from error_analysis import crop_image, show_image_pairs, generate_confusion_matrices, load_config

from importlib import reload

# Suppress all warnings
warnings.filterwarnings("ignore")

# Display configuration for the notebook
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 50)
pd.set_option('display.width', 1000)

In [None]:
model = "rtdert_2.0"
dataset_version = "test1_v1"
# Select the specific run to analyze (use the latest run folder by default)
import os
from pathlib import Path
model_path = f"prediction/{model}"
# Get all run folders sorted by timestamp (latest first)
run_folders = sorted([f for f in os.listdir(model_path) if f.startswith("run_")], reverse=True)
if run_folders:
    run_folder = run_folders[0]  # Use the latest run by default
    print(f"Using latest run: {run_folder}")
else:
    run_folder = "run_latest"
    print("No run folders found. Using default name.")

# Path to the evaluation CSV file
result_file = f"{model_path}/{run_folder}/{model}_evaluation.csv"

In [None]:
df_eval = pd.read_csv(result_file)

## Optimize the confidence

In [None]:
df_eval[(df_eval['gt']=='impression') & (df_eval['eval']=="TP")].sort_values("confidence").head()

In [None]:
df_eval[(df_eval['gt']=='einriss') & (df_eval['eval']=="TP")]['confidence'].hist()

## Confusion Matrix

In [None]:
# Assuming your dataframe is named df

# Find rows that are duplicates based on 'gt', 'gt_location', and 'filename'
duplicates = df_eval.duplicated(subset=['gt', 'gt_location', 'filename'], keep=False)

# Separate the dataframe into rows that are duplicates and those that are not
df_duplicates = df_eval[duplicates]
df_non_duplicates = df_eval[~duplicates]

# Drop rows where 'pred' is null in the duplicate rows
df_duplicates = df_duplicates.dropna(subset=['pred'])

# Combine the non-duplicate rows and the cleaned duplicate rows
df_eval = pd.concat([df_non_duplicates, df_duplicates])

## Error analysis

In [None]:
import error_analysis
reload(error_analysis)

# Specify the directory for images
directory = f'images/{dataset_version}'

# Get the list of file names
file_names = os.listdir(directory)

# Filter out directories, if needed
full_list = [f.split(".")[0] for f in file_names if os.path.isfile(os.path.join(directory, f))]

classes = ["impression", "einriss", "abriss", "asperity", "ausseinriss"]

output_dir = os.path.join(f"{model_path}/{run_folder}", "analysis")

result = error_analysis.generate_confusion_matrices(df_eval, full_list, classes, output_dir, model, show = True, save = False)

In [None]:
import error_analysis
reload(error_analysis)

def_names =  [ "einriss", "abriss", "ausseinriss", "impression", "asperity" ]
# def_names =  [ "abriss"]
type = "FN" # FN, FP
# modes = ["merge", "wrong", "notdetect"]
modes = ["notdetect"]

for def_name in def_names:
    
    for mode in modes:

        if mode == "merge":
            fil_df_eval = df_eval[(df_eval['gt']==def_name) & (df_eval['eval']==type) & (df_eval['pred'] == df_eval['gt'])]
        elif mode == "wrong":
            fil_df_eval = df_eval[(df_eval['gt']==def_name) & (df_eval['eval']==type) & (df_eval['pred'] != df_eval['gt']) & df_eval['pred'].notnull()]
        elif mode == "notdetect":
            fil_df_eval = df_eval[(df_eval['gt']==def_name) & (df_eval['eval']==type) & (df_eval['pred'].isnull())]
            
        if fil_df_eval.shape[0] != 0:
            fil_df_eval['gt_path'] = f"{model_path}/{run_folder}/image_unfilter_crop/" + fil_df_eval['filename'] + ".bmp"
            fil_df_eval = fil_df_eval.sort_values("pred")
            image_pairs = list(zip(fil_df_eval['filename'], fil_df_eval['gt_path'], fil_df_eval['gt'], fil_df_eval['pred']))

            # In notebook, we use show_plot=True to display the images interactively
            error_analysis.show_image_pairs(image_pairs, mode, def_name, model_path, run_folder, save_images=False, show_plot=True)

In [None]:
import error_analysis
reload(error_analysis)

# def_names =  [ "einriss", "abriss", "ausseinriss", "impression", "asperity" ]
def_names = ['abriss']
type = "FP" # FN, FP
# modes = ["wrong", "redundant"]
modes = [ "redundant"]

for def_name in def_names:
    for mode in modes:

        if mode == "wrong":
            fil_df_eval = df_eval[(df_eval['pred']==def_name) & (df_eval['eval']==type) & (df_eval['pred'] != df_eval['gt']) & df_eval['gt'].notnull()]
        elif mode == "redundant":
            fil_df_eval = df_eval[(df_eval['pred']==def_name) & (df_eval['eval']==type) & (df_eval['gt'].isnull())]
            
        if fil_df_eval.shape[0] != 0:
            fil_df_eval['gt_path'] = f"{model_path}/{run_folder}/image_unfilter_crop/" + fil_df_eval['filename'] + ".bmp"
            fil_df_eval = fil_df_eval.sort_values("gt")
            image_pairs = list(zip(fil_df_eval['filename'], fil_df_eval['gt_path'], fil_df_eval['pred'], fil_df_eval['gt']))

            # In notebook, we use show_plot=True to display the images interactively
            error_analysis.show_image_pairs(image_pairs, mode, def_name, model_path, run_folder, None, save_images=False, show_plot=True)

In [None]:
import error_analysis
reload(error_analysis)

# def_names =  [ "einriss", "abriss", "ausseinriss", "impression", "asperity" ]
def_names = ['abriss']
type = "TP"
modes = ["TP"]

for def_name in def_names:
    for mode in modes:

        fil_df_eval = df_eval[(df_eval['pred']==def_name) & (df_eval['eval']==type)]
            
        if fil_df_eval.shape[0] != 0:
            fil_df_eval['gt_path'] = f"{model_path}/{run_folder}/image_unfilter_crop/" + fil_df_eval['filename'] + ".bmp"
            fil_df_eval = fil_df_eval.sort_values("gt")
            image_pairs = list(zip(fil_df_eval['filename'], fil_df_eval['gt_path'], fil_df_eval['pred'], fil_df_eval['gt']))

            # In notebook, we use show_plot=True to display the images interactively
            error_analysis.show_image_pairs(image_pairs, mode, def_name, model_path, run_folder, None, save_images=False, show_plot=True)