# IF5200 - Evaluation Notebook
___
Group: 8<br>
Project: Automated Chest X-Ray Report Generator in Bahasa Indonesia with the Use of Deep Learning<br>
Team members: Arief Purnama Muharram, Hollyana Puteri Haryono, Abassi Haji Juma

Last update: April 5th, 2023

## A. Print library version

In [1]:
print('tqdm version:', __import__('tqdm').__version__)
print('matplotlib version:', __import__('matplotlib').__version__)
print('seaborn version:', __import__('seaborn').__version__)
print('pandas version:', __import__('pandas').__version__)
print('scikit-learn version:', __import__('sklearn').__version__)
print('imblearn version:', __import__('imblearn').__version__)
print('pillow version:', __import__('PIL').__version__)
print('torch version:', __import__('torch').__version__)
print('torchvision version:', __import__('torchvision').__version__)

tqdm version: 4.64.1
matplotlib version: 3.5.3
seaborn version: 0.12.2
pandas version: 1.3.5
scikit-learn version: 1.0.2
imblearn version: 0.10.1
pillow version: 9.4.0
torch version: 1.13.1+cu117
torchvision version: 0.14.1+cu117


## B. Load all helpers

In [2]:
from src.report.utils.inference import InferenceUtils
from src.utils.image import read_image, get_segment, ToTensorTransform

## C. Load dataset

In [3]:
import pandas as pd


df = pd.read_csv('datasets/labels_cxr-images.csv', sep=',')
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 112120 entries, 0 to 112119
Data columns (total 18 columns):
 #   Column              Non-Null Count   Dtype 
---  ------              --------------   ----- 
 0   Image Index         112120 non-null  object
 1   Filename_Segment1   112120 non-null  object
 2   Filename_Segment2   112120 non-null  object
 3   Filename_Segment3   112120 non-null  object
 4   Atelectasis         112120 non-null  int64 
 5   Cardiomegaly        112120 non-null  int64 
 6   Effusion            112120 non-null  int64 
 7   Infiltration        112120 non-null  int64 
 8   Mass                112120 non-null  int64 
 9   Nodule              112120 non-null  int64 
 10  Pneumonia           112120 non-null  int64 
 11  Pneumothorax        112120 non-null  int64 
 12  Consolidation       112120 non-null  int64 
 13  Edema               112120 non-null  int64 
 14  Emphysema           112120 non-null  int64 
 15  Fibrosis            112120 non-null  int64 
 16  Pl

In [4]:
# Select only portions of data for system evaluation
data = df[['Image Index', 'Cardiomegaly', 'Effusion']]
data = data.rename(columns={'Image Index': 'filename', 'Cardiomegaly': 'cardiomegaly', 'Effusion': 'effusion'}, errors='ignore')
data = data.loc[~((data['cardiomegaly'] == 0) & (data['effusion'] == 0))]
data = data.sample(frac=1).head(1000)
data.reset_index(inplace=True)
data.drop(['index'], axis=1, inplace=True)

In [5]:
data.head(3)

Unnamed: 0,filename,cardiomegaly,effusion
0,00027416_010.png,0,1
1,00012505_000.png,0,1
2,00013187_002.png,1,0


## D. Make evaluation

In [6]:
MODELS_DIR = 'sys/models'
IMAGES_DIR = 'datasets/data_cxr-images_512x512'

In [7]:
import os
import torch


# Setup device
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

# Load models
model_cardiomegaly = torch.load(os.path.join(MODELS_DIR, 'model_cardiomegaly.pth'), map_location=device)
model_effusion = torch.load(os.path.join(MODELS_DIR, 'model_effusion.pth'), map_location=device)
# -- Define another models here, if necessary --

# Setup inferences
inference_cardiomegaly = InferenceUtils(model=model_cardiomegaly, device=device)
inference_effusion = InferenceUtils(model=model_effusion, device=device)
# -- Define another inferences here, if necessary --


In [8]:
import os


# Prediction function
def make_prediction(filename):
    image_path = os.path.join(IMAGES_DIR, filename)
    image = read_image(image_path)
    
    # Slice the image
    img_segment1 = get_segment(image, 1)
    img_segment2 = get_segment(image, 2)
    img_segment3 = get_segment(image, 3)
    
    # Convert to tensors
    transformer = ToTensorTransform()
    img_segment1 = transformer.transform(img_segment1)
    img_segment2 = transformer.transform(img_segment2)
    img_segment3 = transformer.transform(img_segment3)
    
    # Make prediction
    result_cardiomegaly = inference_cardiomegaly.make_prediction(img_segment2.unsqueeze(0)).item()
    result_effusion = inference_effusion.make_prediction(img_segment2.unsqueeze(0)).item()
    # -- Make another prediction here, if necessary --
    
    # Return result
    # return [result_1, result_2, ..., result_n]
    return [result_cardiomegaly, result_effusion]
    

In [9]:
import os
from time import sleep
from tqdm import tqdm
from pandas import DataFrame


# Evaluate function
def evaluate(data: DataFrame) -> DataFrame:
    result = []
    
    for idx, row in tqdm(data.iterrows(), total=data.shape[0]):
        # ground_truth = [ground_truth_1, ground_truth_2, ..., ground_truth_n]
        ground_truth = [row['cardiomegaly'], row['effusion']]
        result_code = make_prediction(row['filename'])
        
        # result.append([row['filename'], row['label_1'], row['label_2'], ..., row['label_n'], result_code == ground_truth])
        result.append([row['filename'], os.path.join(IMAGES_DIR, row['filename']), row['cardiomegaly'], row['effusion'], result_code[0], result_code[1], result_code == ground_truth])
        
    result = DataFrame(result, columns=['filename', 'filepath', 'cardiomegaly', 'effusion', 'pred_cardiomegaly', 'pred_effusion', 'result'])
    
    size = len(result)
    count_true = len(result[result['result'] == True])
    
    sleep(3)
    
    print('\nEvaluation report')
    print('-' * 21)
    print('Total data:', size)
    print('Count true:', count_true)
    print('Accuracy:', round((count_true / size), 2), '\n')
    
    return result
    

In [10]:
# Make evaluation
result = evaluate(data)

100%|██████████| 1000/1000 [03:11<00:00,  5.23it/s]



Evaluation report
---------------------
Total data: 1000
Count true: 460
Accuracy: 0.46 



In [11]:
# Print result
result.head(15)

Unnamed: 0,filename,filepath,cardiomegaly,effusion,pred_cardiomegaly,pred_effusion,result
0,00027416_010.png,datasets/data_cxr-images_512x512/00027416_010.png,0,1,1,0,False
1,00012505_000.png,datasets/data_cxr-images_512x512/00012505_000.png,0,1,1,1,False
2,00013187_002.png,datasets/data_cxr-images_512x512/00013187_002.png,1,0,1,1,False
3,00012987_003.png,datasets/data_cxr-images_512x512/00012987_003.png,1,0,0,0,False
4,00029579_012.png,datasets/data_cxr-images_512x512/00029579_012.png,0,1,1,1,False
5,00002437_025.png,datasets/data_cxr-images_512x512/00002437_025.png,0,1,0,1,True
6,00015658_000.png,datasets/data_cxr-images_512x512/00015658_000.png,0,1,0,1,True
7,00005348_012.png,datasets/data_cxr-images_512x512/00005348_012.png,0,1,0,1,True
8,00025793_000.png,datasets/data_cxr-images_512x512/00025793_000.png,0,1,0,0,False
9,00029672_015.png,datasets/data_cxr-images_512x512/00029672_015.png,0,1,0,1,True


In [12]:
# Error analysis
result[result['result'] == True].head(15)

Unnamed: 0,filename,filepath,cardiomegaly,effusion,pred_cardiomegaly,pred_effusion,result
5,00002437_025.png,datasets/data_cxr-images_512x512/00002437_025.png,0,1,0,1,True
6,00015658_000.png,datasets/data_cxr-images_512x512/00015658_000.png,0,1,0,1,True
7,00005348_012.png,datasets/data_cxr-images_512x512/00005348_012.png,0,1,0,1,True
9,00029672_015.png,datasets/data_cxr-images_512x512/00029672_015.png,0,1,0,1,True
14,00011548_005.png,datasets/data_cxr-images_512x512/00011548_005.png,0,1,0,1,True
17,00016291_004.png,datasets/data_cxr-images_512x512/00016291_004.png,0,1,0,1,True
26,00013231_004.png,datasets/data_cxr-images_512x512/00013231_004.png,0,1,0,1,True
27,00008295_011.png,datasets/data_cxr-images_512x512/00008295_011.png,0,1,0,1,True
32,00000766_017.png,datasets/data_cxr-images_512x512/00000766_017.png,0,1,0,1,True
33,00021495_002.png,datasets/data_cxr-images_512x512/00021495_002.png,0,1,0,1,True


In [13]:
# Error analysis
result[result['result'] == False].head(15)

Unnamed: 0,filename,filepath,cardiomegaly,effusion,pred_cardiomegaly,pred_effusion,result
0,00027416_010.png,datasets/data_cxr-images_512x512/00027416_010.png,0,1,1,0,False
1,00012505_000.png,datasets/data_cxr-images_512x512/00012505_000.png,0,1,1,1,False
2,00013187_002.png,datasets/data_cxr-images_512x512/00013187_002.png,1,0,1,1,False
3,00012987_003.png,datasets/data_cxr-images_512x512/00012987_003.png,1,0,0,0,False
4,00029579_012.png,datasets/data_cxr-images_512x512/00029579_012.png,0,1,1,1,False
8,00025793_000.png,datasets/data_cxr-images_512x512/00025793_000.png,0,1,0,0,False
10,00016086_003.png,datasets/data_cxr-images_512x512/00016086_003.png,0,1,1,1,False
11,00016083_004.png,datasets/data_cxr-images_512x512/00016083_004.png,0,1,1,1,False
12,00027789_005.png,datasets/data_cxr-images_512x512/00027789_005.png,0,1,0,0,False
13,00012413_018.png,datasets/data_cxr-images_512x512/00012413_018.png,0,1,1,1,False


In [14]:
# Error analysis
result.loc[((result['cardiomegaly'] == 1) & (result['effusion'] == 1) & (result['result'] == True))].head(15)

Unnamed: 0,filename,filepath,cardiomegaly,effusion,pred_cardiomegaly,pred_effusion,result
82,00011367_004.png,datasets/data_cxr-images_512x512/00011367_004.png,1,1,1,1,True
100,00017138_042.png,datasets/data_cxr-images_512x512/00017138_042.png,1,1,1,1,True
130,00005641_012.png,datasets/data_cxr-images_512x512/00005641_012.png,1,1,1,1,True
139,00002575_000.png,datasets/data_cxr-images_512x512/00002575_000.png,1,1,1,1,True
143,00015278_005.png,datasets/data_cxr-images_512x512/00015278_005.png,1,1,1,1,True
168,00000211_014.png,datasets/data_cxr-images_512x512/00000211_014.png,1,1,1,1,True
241,00005026_013.png,datasets/data_cxr-images_512x512/00005026_013.png,1,1,1,1,True
245,00013249_018.png,datasets/data_cxr-images_512x512/00013249_018.png,1,1,1,1,True
554,00009667_002.png,datasets/data_cxr-images_512x512/00009667_002.png,1,1,1,1,True
646,00013670_147.png,datasets/data_cxr-images_512x512/00013670_147.png,1,1,1,1,True


In [15]:
# Error analysis
# Cardiomegaly model performance

count_cardiomegaly_true = len(result[result['pred_cardiomegaly'] == result['cardiomegaly']])

print('Evaluation report')
print('-' * 21)
print('Total data:', len(result))
print('Count true:', count_cardiomegaly_true)
print('Accuracy:', round((count_cardiomegaly_true / len(result)), 2))

Evaluation report
---------------------
Total data: 1000
Count true: 577
Accuracy: 0.58


In [16]:
# Error analysis
# Effusion model performance

count_effusion_true = len(result[result['pred_effusion'] == result['effusion']])

print('Evaluation report')
print('-' * 21)
print('Total data:', len(result))
print('Count true:', count_effusion_true)
print('Accuracy:', round((count_effusion_true / len(result)), 2))

Evaluation report
---------------------
Total data: 1000
Count true: 823
Accuracy: 0.82
