# IF5200 - Evaluation Notebook
___
Group: 8<br>
Project: Automated Chest X-Ray Report Generator in Bahasa Indonesia with the Use of Deep Learning<br>
Team members: Arief Purnama Muharram, Hollyana Puteri Haryono, Abassi Haji Juma

Last update: April 5th, 2023

## A. Print library version

In [1]:
print('tqdm version:', __import__('tqdm').__version__)
print('matplotlib version:', __import__('matplotlib').__version__)
print('seaborn version:', __import__('seaborn').__version__)
print('pandas version:', __import__('pandas').__version__)
print('scikit-learn version:', __import__('sklearn').__version__)
print('imblearn version:', __import__('imblearn').__version__)
print('pillow version:', __import__('PIL').__version__)
print('torch version:', __import__('torch').__version__)
print('torchvision version:', __import__('torchvision').__version__)

tqdm version: 4.64.1
matplotlib version: 3.5.3
seaborn version: 0.12.2
pandas version: 1.3.5
scikit-learn version: 1.0.2
imblearn version: 0.10.1
pillow version: 9.4.0
torch version: 1.13.1+cu117
torchvision version: 0.14.1+cu117


## B. Load all helpers

In [2]:
from src.report.utils.inference import InferenceUtils
from src.utils.image import read_image, get_segment, ToTensorTransform

## C. Load dataset

In [3]:
import pandas as pd


df = pd.read_csv('datasets/labels_cxr-images.csv', sep=',')
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 112120 entries, 0 to 112119
Data columns (total 18 columns):
 #   Column              Non-Null Count   Dtype 
---  ------              --------------   ----- 
 0   Image Index         112120 non-null  object
 1   Filename_Segment1   112120 non-null  object
 2   Filename_Segment2   112120 non-null  object
 3   Filename_Segment3   112120 non-null  object
 4   Atelectasis         112120 non-null  int64 
 5   Cardiomegaly        112120 non-null  int64 
 6   Effusion            112120 non-null  int64 
 7   Infiltration        112120 non-null  int64 
 8   Mass                112120 non-null  int64 
 9   Nodule              112120 non-null  int64 
 10  Pneumonia           112120 non-null  int64 
 11  Pneumothorax        112120 non-null  int64 
 12  Consolidation       112120 non-null  int64 
 13  Edema               112120 non-null  int64 
 14  Emphysema           112120 non-null  int64 
 15  Fibrosis            112120 non-null  int64 
 16  Pl

In [4]:
# Select only portions of data for system evaluation
data = df[['Image Index', 'Cardiomegaly', 'Effusion', 'Consolidation']]
data = data.rename(columns={'Image Index': 'filename', 'Cardiomegaly': 'cardiomegaly', 'Effusion': 'effusion', 'Consolidation': 'consolidation'}, errors='ignore')
data = data.loc[~((data['cardiomegaly'] == 0) & (data['effusion'] == 0) & (data['consolidation'] == 0))]
data = data.sample(frac=1).head(1000)
data.reset_index(inplace=True)
data.drop(['index'], axis=1, inplace=True)

In [5]:
data.head(3)

Unnamed: 0,filename,cardiomegaly,effusion,consolidation
0,00014879_006.png,0,1,0
1,00001437_039.png,0,1,0
2,00018865_047.png,0,1,0


## D. Make evaluation

In [6]:
MODELS_DIR = 'sys/models'
IMAGES_DIR = 'datasets/data_cxr-images_512x512'

In [7]:
import os
import torch


# Setup device
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

# Load models
model_cardiomegaly = torch.load(os.path.join(MODELS_DIR, 'model_cardiomegaly.pth'), map_location=device)
model_effusion = torch.load(os.path.join(MODELS_DIR, 'model_effusion.pth'), map_location=device)
model_consolidation = torch.load(os.path.join(MODELS_DIR, 'model_consolidation.pth'), map_location=device)
# -- Define another models here, if necessary --

# Setup inferences
inference_cardiomegaly = InferenceUtils(model=model_cardiomegaly, device=device)
inference_effusion = InferenceUtils(model=model_effusion, device=device)
inference_consolidation = InferenceUtils(model=model_consolidation, device=device)
# -- Define another inferences here, if necessary --


In [8]:
import os


# Prediction function
def make_prediction(filename):
    image_path = os.path.join(IMAGES_DIR, filename)
    image = read_image(image_path)
    
    # Slice the image
    img_segment1 = get_segment(image, 1)
    img_segment2 = get_segment(image, 2)
    img_segment3 = get_segment(image, 3)
    
    # Convert to tensors
    transformer = ToTensorTransform()
    img_segment1 = transformer.transform(img_segment1)
    img_segment2 = transformer.transform(img_segment2)
    img_segment3 = transformer.transform(img_segment3)
    
    # Make prediction
    result_cardiomegaly = inference_cardiomegaly.make_prediction(img_segment2.unsqueeze(0)).item()
    result_effusion = inference_effusion.make_prediction(img_segment2.unsqueeze(0)).item()
    result_consolidation = inference_consolidation.make_prediction(img_segment3.unsqueeze(0)).item()
    # -- Make another prediction here, if necessary --
    
    # Return result
    # return [result_1, result_2, ..., result_n]
    return [result_cardiomegaly, result_effusion, result_consolidation]
    

In [9]:
import os
from time import sleep
from tqdm import tqdm
from pandas import DataFrame


# Evaluate function
def evaluate(data: DataFrame) -> DataFrame:
    result = []
    
    for idx, row in tqdm(data.iterrows(), total=data.shape[0]):
        # ground_truth = [ground_truth_1, ground_truth_2, ..., ground_truth_n]
        ground_truth = [row['cardiomegaly'], row['effusion'], row['consolidation']]
        result_code = make_prediction(row['filename'])
        
        # result.append([row['filename'], row['label_1'], row['label_2'], ..., row['label_n'], result_code == ground_truth])
        result.append([row['filename'], os.path.join(IMAGES_DIR, row['filename']), row['cardiomegaly'], row['effusion'], row['consolidation'], result_code[0], result_code[1], result_code[2], result_code == ground_truth])
        
    result = DataFrame(result, columns=['filename', 'filepath', 'cardiomegaly', 'effusion', 'consolidation', 'pred_cardiomegaly', 'pred_effusion', 'pred_consolidation', 'result'])
    
    size = len(result)
    count_true = len(result[result['result'] == True])
    
    sleep(3)
    
    print('\nEvaluation report')
    print('-' * 21)
    print('Total data:', size)
    print('Count true:', count_true)
    print('Accuracy:', round((count_true / size), 2), '\n')
    
    return result
    

In [10]:
# Make evaluation
result = evaluate(data)

100%|██████████| 1000/1000 [03:38<00:00,  4.59it/s]



Evaluation report
---------------------
Total data: 1000
Count true: 151
Accuracy: 0.15 



In [11]:
# Print result
result.head(15)

Unnamed: 0,filename,filepath,cardiomegaly,effusion,consolidation,pred_cardiomegaly,pred_effusion,pred_consolidation,result
0,00014879_006.png,datasets/data_cxr-images_512x512/00014879_006.png,0,1,0,1,0,0,False
1,00001437_039.png,datasets/data_cxr-images_512x512/00001437_039.png,0,1,0,1,1,1,False
2,00018865_047.png,datasets/data_cxr-images_512x512/00018865_047.png,0,1,0,1,1,0,False
3,00013685_041.png,datasets/data_cxr-images_512x512/00013685_041.png,0,1,0,1,1,1,False
4,00018253_039.png,datasets/data_cxr-images_512x512/00018253_039.png,0,1,0,1,1,1,False
5,00022065_004.png,datasets/data_cxr-images_512x512/00022065_004.png,0,1,0,0,1,1,False
6,00028509_037.png,datasets/data_cxr-images_512x512/00028509_037.png,0,0,1,1,1,1,False
7,00022416_042.png,datasets/data_cxr-images_512x512/00022416_042.png,0,1,0,1,1,1,False
8,00000127_005.png,datasets/data_cxr-images_512x512/00000127_005.png,0,1,0,1,1,1,False
9,00026810_043.png,datasets/data_cxr-images_512x512/00026810_043.png,0,0,1,1,1,1,False


In [12]:
# Error analysis
result[result['result'] == True].head(15)

Unnamed: 0,filename,filepath,cardiomegaly,effusion,consolidation,pred_cardiomegaly,pred_effusion,pred_consolidation,result
17,00023323_001.png,datasets/data_cxr-images_512x512/00023323_001.png,0,1,0,0,1,0,True
22,00012576_012.png,datasets/data_cxr-images_512x512/00012576_012.png,0,1,0,0,1,0,True
28,00012045_040.png,datasets/data_cxr-images_512x512/00012045_040.png,0,1,0,0,1,0,True
56,00022031_010.png,datasets/data_cxr-images_512x512/00022031_010.png,0,1,0,0,1,0,True
69,00014551_010.png,datasets/data_cxr-images_512x512/00014551_010.png,0,1,0,0,1,0,True
73,00008038_002.png,datasets/data_cxr-images_512x512/00008038_002.png,0,1,0,0,1,0,True
83,00025082_006.png,datasets/data_cxr-images_512x512/00025082_006.png,0,1,0,0,1,0,True
84,00007989_000.png,datasets/data_cxr-images_512x512/00007989_000.png,1,0,0,1,0,0,True
88,00011967_003.png,datasets/data_cxr-images_512x512/00011967_003.png,0,1,0,0,1,0,True
93,00005372_027.png,datasets/data_cxr-images_512x512/00005372_027.png,0,1,0,0,1,0,True


In [13]:
# Error analysis
result[result['result'] == False].head(15)

Unnamed: 0,filename,filepath,cardiomegaly,effusion,consolidation,pred_cardiomegaly,pred_effusion,pred_consolidation,result
0,00014879_006.png,datasets/data_cxr-images_512x512/00014879_006.png,0,1,0,1,0,0,False
1,00001437_039.png,datasets/data_cxr-images_512x512/00001437_039.png,0,1,0,1,1,1,False
2,00018865_047.png,datasets/data_cxr-images_512x512/00018865_047.png,0,1,0,1,1,0,False
3,00013685_041.png,datasets/data_cxr-images_512x512/00013685_041.png,0,1,0,1,1,1,False
4,00018253_039.png,datasets/data_cxr-images_512x512/00018253_039.png,0,1,0,1,1,1,False
5,00022065_004.png,datasets/data_cxr-images_512x512/00022065_004.png,0,1,0,0,1,1,False
6,00028509_037.png,datasets/data_cxr-images_512x512/00028509_037.png,0,0,1,1,1,1,False
7,00022416_042.png,datasets/data_cxr-images_512x512/00022416_042.png,0,1,0,1,1,1,False
8,00000127_005.png,datasets/data_cxr-images_512x512/00000127_005.png,0,1,0,1,1,1,False
9,00026810_043.png,datasets/data_cxr-images_512x512/00026810_043.png,0,0,1,1,1,1,False


In [14]:
# Error analysis
result.loc[((result['cardiomegaly'] == 1) & (result['effusion'] == 1) & (result['result'] == True))].head(15)

Unnamed: 0,filename,filepath,cardiomegaly,effusion,consolidation,pred_cardiomegaly,pred_effusion,pred_consolidation,result
115,00030206_013.png,datasets/data_cxr-images_512x512/00030206_013.png,1,1,0,1,1,0,True
161,00012987_006.png,datasets/data_cxr-images_512x512/00012987_006.png,1,1,0,1,1,0,True
177,00018972_042.png,datasets/data_cxr-images_512x512/00018972_042.png,1,1,1,1,1,1,True
223,00015770_033.png,datasets/data_cxr-images_512x512/00015770_033.png,1,1,1,1,1,1,True
347,00002806_004.png,datasets/data_cxr-images_512x512/00002806_004.png,1,1,0,1,1,0,True
391,00022611_003.png,datasets/data_cxr-images_512x512/00022611_003.png,1,1,0,1,1,0,True
630,00029459_004.png,datasets/data_cxr-images_512x512/00029459_004.png,1,1,0,1,1,0,True
807,00001582_024.png,datasets/data_cxr-images_512x512/00001582_024.png,1,1,0,1,1,0,True
809,00028316_000.png,datasets/data_cxr-images_512x512/00028316_000.png,1,1,0,1,1,0,True
922,00014626_009.png,datasets/data_cxr-images_512x512/00014626_009.png,1,1,0,1,1,0,True


In [15]:
# Error analysis
# Cardiomegaly model performance

count_cardiomegaly_true = len(result[result['pred_cardiomegaly'] == result['cardiomegaly']])

print('Evaluation report')
print('-' * 21)
print('Total data:', len(result))
print('Count true:', count_cardiomegaly_true)
print('Accuracy:', round((count_cardiomegaly_true / len(result)), 2))

Evaluation report
---------------------
Total data: 1000
Count true: 451
Accuracy: 0.45


In [16]:
# Error analysis
# Effusion model performance

count_effusion_true = len(result[result['pred_effusion'] == result['effusion']])

print('Evaluation report')
print('-' * 21)
print('Total data:', len(result))
print('Count true:', count_effusion_true)
print('Accuracy:', round((count_effusion_true / len(result)), 2))

Evaluation report
---------------------
Total data: 1000
Count true: 717
Accuracy: 0.72


In [17]:
# Error analysis
# Consolidation model performance

count_consolidation_true = len(result[result['pred_consolidation'] == result['consolidation']])

print('Evaluation report')
print('-' * 21)
print('Total data:', len(result))
print('Count true:', count_consolidation_true)
print('Accuracy:', round((count_consolidation_true / len(result)), 2))

Evaluation report
---------------------
Total data: 1000
Count true: 543
Accuracy: 0.54
