In [None]:
import os
from pathlib import Path
from tqdm import tqdm
import pandas as pd
import seaborn as sns

import pv_vision.defective_cell_detection.result_analysis as analysis

# Get labels from YOLO annotations

In [None]:
# collect the true labels in the training set
train_dir = Path('grayscale_train/train')
train_anns = os.listdir(train_dir/'ann')

label2defect = {
    "crack_bbox": "crack", 
    "oxygen_bbox": "oxygen", 
    "solder_bbox": "solder", 
    "intra_bbox": "intra"
    }

train_cells_info = {
    'module_name': [],
    'index': [],
    'labels_true': [],
    'x': [],
    'y': [],
}

for file in tqdm(train_anns):
    cells_info = analysis.collect_all_cells(train_dir/'ann'/file, labels_true = label2defect)
    for key, value in cells_info.items():
        train_cells_info[key] += value

train_cells_info = pd.DataFrame(train_cells_info)
train_cells_info.head()

In [None]:
# check the distribution of each class 
train_cells_info.groupby('labels_true').size()

In [None]:
# sometimes people just want to get the information of defects (exclude intact)

defects_info = {
    'module_name': [],
    'index': [],
    'defects': [],
    'x': [],
    'y': []
}

for ann_file in tqdm(train_anns):
    defects_info = analysis.collect_defects(train_dir/"ann"/ann_file, defects_info, label2defect, mode=0)

train_defects = pd.DataFrame(defects_info)
train_defects.head()

In [None]:
# Sometimes people just want to get the label list to compare prediction and ground truth. 
# The ground truth and prediction may be stored in separate files

manual_val_dir = Path('grayscale_val/val/ann/')
yolo_val_dir = Path('prediction_val_grayscale_yolo_0.65/val/ann')
ann_files_val = os.listdir(manual_val_dir)

# mapping labels to digit. Intact is denoted as 1 here. 
defects_dic_manual = {
        'crack_bbox': 0,
        'solder_bbox': 4,
        'intra_bbox': 2,
        'oxygen_bbox': 3

    }

defects_dic_yolo = {
        'crack_bbox_yolo': 0,
        'solder_bbox_yolo': 4,
        'intra_bbox_yolo': 2,
        'oxygen_bbox_yolo': 3
    }

yolo_manual_y_val = []
yolo_pred_y_val = []

for ann_file in tqdm(ann_files_val):
    yolo_manual_y_val += list(analysis.get_label_one_module(manual_val_dir/ann_file, defects_dic_manual, fill_label=1))
    yolo_pred_y_val += list(analysis.get_label_one_module(yolo_val_dir/ann_file, defects_dic_yolo, fill_label=1))

# Further analysis like metric reports can be found in the tutorial of random forest

In [None]:
yolo_manual_y_val[:10], yolo_pred_y_val[:10]

# position distribution

In [None]:
# Use defects dataframe to plot the distribution of defects
# here use "crack" as an example, you can also use other defect name

train_pv = analysis.plot_heatmap(train_defects, "crack", linewidths=0.3, cbar_size=0.8)