# IF5200 - Evaluation Notebook
___
Group: 8<br>
Project: Automated Chest X-Ray Report Generator in Bahasa Indonesia with the Use of Deep Learning<br>
Team members: Arief Purnama Muharram, Hollyana Puteri Haryono, Abassi Haji Juma

Last update: April 5th, 2023

## A. Print library version

In [1]:
print('tqdm version:', __import__('tqdm').__version__)
print('matplotlib version:', __import__('matplotlib').__version__)
print('seaborn version:', __import__('seaborn').__version__)
print('pandas version:', __import__('pandas').__version__)
print('scikit-learn version:', __import__('sklearn').__version__)
print('imblearn version:', __import__('imblearn').__version__)
print('pillow version:', __import__('PIL').__version__)
print('torch version:', __import__('torch').__version__)
print('torchvision version:', __import__('torchvision').__version__)

tqdm version: 4.64.1
matplotlib version: 3.5.3
seaborn version: 0.12.2
pandas version: 1.3.5
scikit-learn version: 1.0.2
imblearn version: 0.10.1
pillow version: 9.4.0
torch version: 1.13.1+cu117
torchvision version: 0.14.1+cu117


## B. Load all helpers

In [2]:
from src.report.utils.inference import InferenceUtils
from src.utils.image import read_image, get_segment, ToTensorTransform

## C. Load dataset

In [3]:
import pandas as pd


df = pd.read_csv('datasets/labels_cxr-images.csv', sep=',')
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 112120 entries, 0 to 112119
Data columns (total 18 columns):
 #   Column              Non-Null Count   Dtype 
---  ------              --------------   ----- 
 0   Image Index         112120 non-null  object
 1   Filename_Segment1   112120 non-null  object
 2   Filename_Segment2   112120 non-null  object
 3   Filename_Segment3   112120 non-null  object
 4   Atelectasis         112120 non-null  int64 
 5   Cardiomegaly        112120 non-null  int64 
 6   Effusion            112120 non-null  int64 
 7   Infiltration        112120 non-null  int64 
 8   Mass                112120 non-null  int64 
 9   Nodule              112120 non-null  int64 
 10  Pneumonia           112120 non-null  int64 
 11  Pneumothorax        112120 non-null  int64 
 12  Consolidation       112120 non-null  int64 
 13  Edema               112120 non-null  int64 
 14  Emphysema           112120 non-null  int64 
 15  Fibrosis            112120 non-null  int64 
 16  Pl

In [4]:
# Select only portions of data for system evaluation
data = df[['Image Index', 'Cardiomegaly', 'Effusion']]
data = data.rename(columns={'Image Index': 'filename', 'Cardiomegaly': 'cardiomegaly', 'Effusion': 'effusion'}, errors='ignore')
data = data.loc[~((data['cardiomegaly'] == 0) & (data['effusion'] == 0))]
data = data.sample(frac=1).head(1000)
data.reset_index(inplace=True)
data.drop(['index'], axis=1, inplace=True)

In [5]:
data.head(3)

Unnamed: 0,filename,cardiomegaly,effusion
0,00021499_013.png,0,1
1,00022526_002.png,0,1
2,00017137_012.png,0,1


## D. Make evaluation

In [6]:
MODELS_DIR = 'sys/models'
IMAGES_DIR = 'datasets/data_cxr-images_512x512'

In [7]:
import os
import torch


# Setup device
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

# Load models
model_cardiomegaly = torch.load(os.path.join(MODELS_DIR, 'model_cardiomegaly.pth'), map_location=device)
model_effusion = torch.load(os.path.join(MODELS_DIR, 'model_effusion.pth'), map_location=device)
# -- Define another models here, if necessary --

# Setup inferences
inference_cardiomegaly = InferenceUtils(model=model_cardiomegaly, device=device)
inference_effusion = InferenceUtils(model=model_effusion, device=device)
# -- Define another inferences here, if necessary --


In [8]:
import os


# Prediction function
def make_prediction(filename):
    image_path = os.path.join(IMAGES_DIR, filename)
    image = read_image(image_path)
    
    # Slice the image
    img_segment1 = get_segment(image, 1)
    img_segment2 = get_segment(image, 2)
    img_segment3 = get_segment(image, 3)
    
    # Convert to tensors
    transformer = ToTensorTransform()
    img_segment1 = transformer.transform(img_segment1)
    img_segment2 = transformer.transform(img_segment2)
    img_segment3 = transformer.transform(img_segment3)
    
    # Make prediction
    result_cardiomegaly = inference_cardiomegaly.make_prediction(img_segment2.unsqueeze(0)).item()
    result_effusion = inference_effusion.make_prediction(img_segment2.unsqueeze(0)).item()
    # -- Make another prediction here, if necessary --
    
    # Return result
    # return [result_1, result_2, ..., result_n]
    return [result_cardiomegaly, result_effusion]
    

In [9]:
import os
from time import sleep
from tqdm import tqdm
from pandas import DataFrame


# Evaluate function
def evaluate(data: DataFrame) -> DataFrame:
    result = []
    
    for idx, row in tqdm(data.iterrows(), total=data.shape[0]):
        # ground_truth = [ground_truth_1, ground_truth_2, ..., ground_truth_n]
        ground_truth = [row['cardiomegaly'], row['effusion']]
        result_code = make_prediction(row['filename'])
        
        # result.append([row['filename'], row['label_1'], row['label_2'], ..., row['label_n'], result_code == ground_truth])
        result.append([row['filename'], os.path.join(IMAGES_DIR, row['filename']), row['cardiomegaly'], row['effusion'], result_code[0], result_code[1], result_code == ground_truth])
        
    result = DataFrame(result, columns=['filename', 'filepath', 'cardiomegaly', 'effusion', 'pred_cardiomegaly', 'pred_effusion', 'result'])
    
    size = len(result)
    count_true = len(result[result['result'] == True])
    
    sleep(3)
    
    print('\nEvaluation report')
    print('-' * 21)
    print('Total data:', size)
    print('Count true:', count_true)
    print('Accuracy:', round((count_true / size), 2), '\n')
    
    return result
    

In [10]:
# Make evaluation
result = evaluate(data)

100%|██████████| 1000/1000 [03:06<00:00,  5.37it/s]



Evaluation report
---------------------
Total data: 1000
Count true: 333
Accuracy: 0.33 



In [11]:
# Print result
result.head(15)

Unnamed: 0,filename,filepath,cardiomegaly,effusion,pred_cardiomegaly,pred_effusion,result
0,00021499_013.png,datasets/data_cxr-images_512x512/00021499_013.png,0,1,1,1,False
1,00022526_002.png,datasets/data_cxr-images_512x512/00022526_002.png,0,1,1,1,False
2,00017137_012.png,datasets/data_cxr-images_512x512/00017137_012.png,0,1,1,1,False
3,00018592_007.png,datasets/data_cxr-images_512x512/00018592_007.png,0,1,0,1,True
4,00003989_001.png,datasets/data_cxr-images_512x512/00003989_001.png,1,0,1,0,True
5,00029052_011.png,datasets/data_cxr-images_512x512/00029052_011.png,0,1,0,1,True
6,00009326_000.png,datasets/data_cxr-images_512x512/00009326_000.png,0,1,0,0,False
7,00027378_000.png,datasets/data_cxr-images_512x512/00027378_000.png,0,1,1,1,False
8,00028523_010.png,datasets/data_cxr-images_512x512/00028523_010.png,0,1,1,1,False
9,00011463_002.png,datasets/data_cxr-images_512x512/00011463_002.png,1,0,1,1,False


In [12]:
# Error analysis
result[result['result'] == True].head(15)

Unnamed: 0,filename,filepath,cardiomegaly,effusion,pred_cardiomegaly,pred_effusion,result
3,00018592_007.png,datasets/data_cxr-images_512x512/00018592_007.png,0,1,0,1,True
4,00003989_001.png,datasets/data_cxr-images_512x512/00003989_001.png,1,0,1,0,True
5,00029052_011.png,datasets/data_cxr-images_512x512/00029052_011.png,0,1,0,1,True
12,00025691_004.png,datasets/data_cxr-images_512x512/00025691_004.png,0,1,0,1,True
13,00015956_027.png,datasets/data_cxr-images_512x512/00015956_027.png,0,1,0,1,True
19,00005824_002.png,datasets/data_cxr-images_512x512/00005824_002.png,0,1,0,1,True
20,00004418_005.png,datasets/data_cxr-images_512x512/00004418_005.png,0,1,0,1,True
25,00013616_054.png,datasets/data_cxr-images_512x512/00013616_054.png,0,1,0,1,True
30,00005564_010.png,datasets/data_cxr-images_512x512/00005564_010.png,0,1,0,1,True
31,00001385_012.png,datasets/data_cxr-images_512x512/00001385_012.png,1,0,1,0,True


In [13]:
# Error analysis
result[result['result'] == False].head(15)

Unnamed: 0,filename,filepath,cardiomegaly,effusion,pred_cardiomegaly,pred_effusion,result
0,00021499_013.png,datasets/data_cxr-images_512x512/00021499_013.png,0,1,1,1,False
1,00022526_002.png,datasets/data_cxr-images_512x512/00022526_002.png,0,1,1,1,False
2,00017137_012.png,datasets/data_cxr-images_512x512/00017137_012.png,0,1,1,1,False
6,00009326_000.png,datasets/data_cxr-images_512x512/00009326_000.png,0,1,0,0,False
7,00027378_000.png,datasets/data_cxr-images_512x512/00027378_000.png,0,1,1,1,False
8,00028523_010.png,datasets/data_cxr-images_512x512/00028523_010.png,0,1,1,1,False
9,00011463_002.png,datasets/data_cxr-images_512x512/00011463_002.png,1,0,1,1,False
10,00013608_022.png,datasets/data_cxr-images_512x512/00013608_022.png,0,1,1,1,False
11,00021362_004.png,datasets/data_cxr-images_512x512/00021362_004.png,0,1,1,1,False
14,00002892_008.png,datasets/data_cxr-images_512x512/00002892_008.png,0,1,1,1,False


In [14]:
# Error analysis
result.loc[((result['cardiomegaly'] == 1) & (result['effusion'] == 1) & (result['result'] == True))].head(15)

Unnamed: 0,filename,filepath,cardiomegaly,effusion,pred_cardiomegaly,pred_effusion,result
48,00017504_034.png,datasets/data_cxr-images_512x512/00017504_034.png,1,1,1,1,True
56,00020819_010.png,datasets/data_cxr-images_512x512/00020819_010.png,1,1,1,1,True
74,00019508_012.png,datasets/data_cxr-images_512x512/00019508_012.png,1,1,1,1,True
127,00019924_031.png,datasets/data_cxr-images_512x512/00019924_031.png,1,1,1,1,True
132,00000013_027.png,datasets/data_cxr-images_512x512/00000013_027.png,1,1,1,1,True
142,00022572_061.png,datasets/data_cxr-images_512x512/00022572_061.png,1,1,1,1,True
160,00028433_019.png,datasets/data_cxr-images_512x512/00028433_019.png,1,1,1,1,True
185,00012670_000.png,datasets/data_cxr-images_512x512/00012670_000.png,1,1,1,1,True
199,00021796_004.png,datasets/data_cxr-images_512x512/00021796_004.png,1,1,1,1,True
202,00002763_016.png,datasets/data_cxr-images_512x512/00002763_016.png,1,1,1,1,True


In [15]:
# Error analysis
# Cardiomegaly model performance

count_cardiomegaly_true = len(result[result['pred_cardiomegaly'] == result['cardiomegaly']])

print('Evaluation report')
print('-' * 21)
print('Total data:', len(result))
print('Count true:', count_cardiomegaly_true)
print('Accuracy:', round((count_cardiomegaly_true / len(result)), 2))

Evaluation report
---------------------
Total data: 1000
Count true: 466
Accuracy: 0.47


In [16]:
# Error analysis
# Effusion model performance

count_effusion_true = len(result[result['pred_effusion'] == result['effusion']])

print('Evaluation report')
print('-' * 21)
print('Total data:', len(result))
print('Count true:', count_effusion_true)
print('Accuracy:', round((count_effusion_true / len(result)), 2))

Evaluation report
---------------------
Total data: 1000
Count true: 828
Accuracy: 0.83
