In [1]:
import json
import cv2
import matplotlib
import matplotlib.pyplot as plt
import torch
import numpy as np
import pandas as pd

from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.preprocessing import MultiLabelBinarizer

from pathlib import Path
from dotenv import load_dotenv

from utils.core import main

import os

load_dotenv()
matplotlib.use('module://matplotlib_inline.backend_inline')

%matplotlib inline



In [2]:
data_dir = '..' / Path(os.getenv('DATASET_DIR'))
model = torch.hub.load(r'.\YOLO', 'custom', path=r'.\YOLO\weights\8-bound.pt', source='local')


In [3]:
csv_data = data_dir / 'pano-report.csv'

df = pd.read_csv(csv_data)
df = df.replace('0', '')
df = df.replace(0.0, '')
df = df.drop('comment', axis=1)
df.dropna(inplace=True)

df = df[:32]
df


Unnamed: 0,filename,R.R,caries,crown,endo,...,filling,Imp,embedded,impacted,missing
0,00006145.jpg,,25.0,,,...,24,,,,
1,00008026.jpg,,46.0,11122122263236,112136,...,17,,38.0,,
2,00008075.jpg,,,,,...,3637,,,,
3,00008113.jpg,,11124135.0,36,36,...,14151617222425262737384647,,,,
4,00008117.jpg,,162426.0,,,...,274647,,3848.0,,
5,00008131.jpg,,151734.0,,44,...,,,,,36374647.0
6,00008136.jpg,,161744.0,3233,,...,28,,,,34353637454647.0
7,00008137.jpg,,2226.0,3637,,...,27313441444547,,,,
8,00008140.jpg,,16214547.0,,1121,...,1621252728,,18.0,,363746.0
9,00008145.jpg,,273445.0,46,,...,16262747,46.0,,,


In [4]:
mlb = MultiLabelBinarizer()

df_columns = df.columns[1:]
x_for_possible = [(col_name, ) for col_name in df_columns]

mlb.fit(x_for_possible)
mlb.transform([['R.R', 'missing'], ['Imp'], []])
# enc.transform(x_for_possible)


array([[0, 1, 0, 0, 0, 0, 0, 0, 1, 0],
       [1, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]])

In [5]:
teeth_number_possible = [[i * 10 + j for j in range(1, 9)] for i in range(1, 5)]
teeth_number_possible = np.hstack(teeth_number_possible).tolist()
# teeth_number_possible = teeth_number_possible.reshape(-1, 1)

y_encode = []
tooth_idx_dict = {tooth_number:idx for idx, tooth_number in enumerate(teeth_number_possible)}
for index, row in df.iterrows():
    temp = [[] for _ in teeth_number_possible]

    for col_name in df_columns:
        class_tooth = row[col_name]
        class_tooth = class_tooth.split(',') if class_tooth else []
        class_tooth = list(map(int, class_tooth))

        for tooth_number in class_tooth:
            idx = tooth_idx_dict[tooth_number]
            temp[idx].append(col_name)

    y_encode.append(mlb.transform(temp))

y_encode[:2]


[array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 1, 0, 0, 0],
        [0, 0, 1, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],


In [6]:
image_names = df['filename']
image_names = image_names.apply(lambda s: data_dir / 'phase-2' / s)
image_names = image_names.tolist()

anomaly_dict = main(image_names, iou_threshold=0.3)
y_predict = []

for filename in df['filename']:
    teeth = anomaly_dict[filename[:-4]]

    temp = [[] for _ in teeth_number_possible]
    for tooth_number, anomalies in teeth.items():
        try:
            idx = tooth_idx_dict[tooth_number]
        except KeyError:
            continue

        temp[idx] = list(anomalies)

    y_predict.append(mlb.transform(temp))

y_predict[:2]


KeyError: 50

In [None]:
len(y_predict)


In [None]:
y_predict_sum = np.vstack([np.sum(i, axis=0) for i in y_predict])
y_encode_sum = np.vstack([np.sum(i, axis=0) for i in y_encode])

# np.r_[mlb.classes_.reshape((1, 10)), y_encode_sum - y_predict_sum]
np.c_[np.r_[[0], df['filename'].to_numpy()], np.r_[mlb.classes_.reshape((1, 10)), y_encode_sum - y_predict_sum]]
# mlb.inverse_transform(y_encode[0])
# mlb.classes_.shape
# (y_encode_sum - y_predict_sum).shape


In [None]:
y_encode_stack = np.vstack(y_encode)
y_predict_stack = np.vstack(y_predict)

acc = accuracy_score(y_encode_stack, y_predict_stack)
precision = precision_score(y_encode_stack, y_predict_stack, average=None)
recall = recall_score(y_encode_stack, y_predict_stack, average=None)
f1 = f1_score(y_encode_stack, y_predict_stack, average=None)

np.c_[mlb.classes_, precision, recall, f1]
# acc
