In [1]:
import seaborn as sns
import matplotlib.style as style 
import matplotlib.pyplot as plt
style.use('ggplot')
# style.use('seaborn-poster') #sets the size of the charts

import cv2
import os 
import pandas as pd
from tqdm import tqdm
import numpy as np
import sys
import re
from sklearn.metrics import f1_score, accuracy_score, balanced_accuracy_score, \
                            precision_score, recall_score, auc, roc_auc_score, \
                            confusion_matrix, roc_curve
from utils.print_pretty_confusion_matrix import plot_confusion_matrix_from_data , pretty_plot_confusion_matrix
sys.path.insert(0, 'dataloaders/')
from ast import literal_eval
from os.path import join
from os import listdir 

from binary_data_loader import LAD_MPR_Loader

In [2]:
# TEMPORARY FIGURE SIZE CHANGES
from pylab import rcParams
rcParams['figure.figsize'] = 18, 18

In [5]:
data_split = 'test'
model_name = 'retrained_resnet18_balanced_data_without_25' #'retrained_resnet18_balanced_data_without_25_text_removed'

p = 'prediction_results/{}/{}/'.format(model_name, data_split)
p_data = 'data/binary_classification_only_lad/'

data_df = pd.read_csv(os.path.join(p_data, 'test_without_25_text_removed.csv'))
preds_df = pd.read_csv(os.path.join(p, '{}.csv'.format(data_split)))

labels = preds_df['LABELS'].values
preds = preds_df['PREDS'].values
probas = preds_df['PREDS_PROBAS'].values

preds_df['PATIENT_ID'] = preds_df['IMG_NAME'].apply(lambda x: x.split('_')[2])
preds_df['SECTION'] = preds_df['IMG_NAME'].apply(lambda x: x.split('_')[0])

patients_ids = preds_df['PATIENT_ID'].unique()
sections_ids = preds_df['SECTION'].unique()

In [6]:
preds_df.groupby(['PATIENT_ID', 'SECTION']).count()['PREDS']

PATIENT_ID       SECTION
CTCAPHD16081938  D-1        17
                 LAD        17
CTCASIP30041975  D-1        17
                 D-2        17
                 LAD        17
CTCASTR17021954  D-1        17
CTCASTS01111969  D-1        17
                 LAD        17
CTCASTW15121946  LAD        17
CTCATHJ17011957  D-1        17
                 LAD        17
CTCATKR01031953  D-1        17
                 D-2        17
                 LAD        17
CTCAVAH09071948  D-1        17
                 LAD        17
CTCAWUK05041963  D-1        17
CTCAYOA13121966  D-1        17
                 D-2        17
CTCAYOG08091955  D-1        17
                 D-2        17
                 LAD        17
CTCAZDV13081958  D-1        17
                 D-2        17
                 LAD        17
CTCAZHX30011957  D-1        17
                 LAD        17
Name: PREDS, dtype: int64

In [7]:
s = preds_df['STENOSIS_SCORE'].values
temp = [' '.join(re.sub('[>,%<]', ' ', el).split('___')).replace('NORMAL', '0').split() for el in s]
d = {'0':1, '25':2, '25-50':3, '50':4,'50-70':5, '70':6}
temp = [max([d[i] for i in t ]) for t in temp]
s = [list(d.keys())[list(d.values()).index(el)] for el in temp]
preds_df['STENOSIS'] = s

In [8]:
all_probas = [[float(el) for el in arr[1:-1].split(' ') if len(el)>2] for arr in probas]
preds_df['PROBABILITY'] = [x[1] for x in all_probas]

In [9]:
preds_df[['LABELS', 'PREDS', 'PATIENT_ID', 'SECTION']].groupby(['PATIENT_ID', 'SECTION'], as_index=False).sum()

Unnamed: 0,PATIENT_ID,SECTION,LABELS,PREDS
0,CTCAPHD16081938,D-1,0,0
1,CTCAPHD16081938,LAD,0,3
2,CTCASIP30041975,D-1,0,0
3,CTCASIP30041975,D-2,0,0
4,CTCASIP30041975,LAD,17,9
5,CTCASTR17021954,D-1,0,0
6,CTCASTS01111969,D-1,0,0
7,CTCASTS01111969,LAD,17,6
8,CTCASTW15121946,LAD,17,17
9,CTCATHJ17011957,D-1,0,0


In [10]:
mistakes = preds_df[preds_df['LABELS'] != preds_df['PREDS']]

In [11]:
mistakes.iloc[1]

Unnamed: 0                                    76
IMG_NAME          LAD_762_CTCAZHX30011957_22.png
LABELS                                         0
PATIENT_NAME                     CTCAZHX30011957
PREDS                                          1
PREDS_PROBAS             [0.45027855 0.5497214 ]
STENOSIS_SCORE          NORMAL___NORMAL___NORMAL
PATIENT_ID                       CTCAZHX30011957
SECTION                                      LAD
STENOSIS                                       0
PROBABILITY                             0.549721
Name: 76, dtype: object

## Save all mistakes

In [12]:
p_data = 'data/binary_classification_only_lad/test'
folder_viz = p + 'visualization'
folder_imag = p + 'visualization/images'
folder_imag_mistakes = p + 'visualization/images/mistakes'
if not os.path.exists(folder_viz):
    os.mkdir(folder_viz)
if not os.path.exists(folder_imag):
    os.mkdir(folder_imag)
if not os.path.exists(folder_imag_mistakes):
    os.mkdir(folder_imag_mistakes)
    
for index,a in mistakes.iterrows():
    im_name = [x for x in listdir(join(join(p_data, a['PATIENT_NAME']), a['SECTION'])) if a['IMG_NAME'] in x][0]
    img = cv2.imread(join(p_data,a['PATIENT_NAME'],a['SECTION'],im_name),0)
    label_text = 'Label: ' + str(a['LABELS']) + '  stenosis: ' + a['STENOSIS'] + '%'
    pred_text = '    |   Prediction: ' + str(a['PREDS']) + '  prob: ' + str(round(a['PROBABILITY']*100, 3))+ '%'
    
    font = cv2.FONT_HERSHEY_COMPLEX
    cv2.putText(img,label_text + pred_text,(10,500), font, 0.4,(255,255,255),1,cv2.LINE_AA)
    cv2.imwrite( join(folder_imag_mistakes, im_name), img )
#     plt.imshow(img, cmap='gray')
#     plt.show()

## Save correct predictions by group

In [27]:
corrects = preds_df[preds_df['LABELS'] == preds_df['PREDS']]
corrects = corrects.groupby(['SECTION', 'LABELS']).head(10)  # select 10 examples from each group

In [29]:
p_data = 'data/binary_classification_only_lad/test'
folder_viz = p + 'visualization'
folder_imag = p + 'visualization/images'
folder_imag_correct = p + 'visualization/images/correct'
if not os.path.exists(folder_viz):
    os.mkdir(folder_viz)
if not os.path.exists(folder_imag):
    os.mkdir(folder_imag)
if not os.path.exists(folder_imag_correct):
    os.mkdir(folder_imag_correct)
    
for index,a in corrects.iterrows():
    im_name = [x for x in listdir(join(join(p_data, a['PATIENT_NAME']), a['SECTION'])) if a['IMG_NAME'] in x][0]
    img = cv2.imread(join(p_data,a['PATIENT_NAME'],a['SECTION'],im_name),0)
    label_text = 'Label: ' + str(a['LABELS']) + '  stenosis: ' + a['STENOSIS'] + '%'
    pred_text = '    |   Prediction: ' + str(a['PREDS']) + '  prob: ' + str(round(a['PROBABILITY']*100, 3))+ '%'
    
    font = cv2.FONT_HERSHEY_COMPLEX
    cv2.putText(img,label_text + pred_text,(10,500), font, 0.4,(255,255,255),1,cv2.LINE_AA)
    cv2.imwrite( join(folder_imag_correct, im_name), img )
#     plt.imshow(img, cmap='gray')
#     plt.show()