In [1]:

import json
import pandas as pd


In [2]:

df_info = pd.read_csv("../dataset/info.csv")
df_info.head()


Unnamed: 0,Image Index,Finding Labels,Follow-up #,Patient ID,Patient Age,Patient Gender,View Position,OriginalImage[Width,Height],OriginalImagePixelSpacing[x,y],Unnamed: 11
0,00000001_000.png,Cardiomegaly,0,1,58,M,PA,2682,2749,0.143,0.143,
1,00000001_001.png,Cardiomegaly|Emphysema,1,1,58,M,PA,2894,2729,0.143,0.143,
2,00000001_002.png,Cardiomegaly|Effusion,2,1,58,M,PA,2500,2048,0.168,0.168,
3,00000002_000.png,No Finding,0,2,81,M,PA,2500,2048,0.171,0.171,
4,00000003_000.png,Hernia,0,3,81,F,PA,2582,2991,0.143,0.143,


In [3]:

df_index_label = df_info[["Image Index", "Finding Labels"]]
df_index_label.head()


Unnamed: 0,Image Index,Finding Labels
0,00000001_000.png,Cardiomegaly
1,00000001_001.png,Cardiomegaly|Emphysema
2,00000001_002.png,Cardiomegaly|Effusion
3,00000002_000.png,No Finding
4,00000003_000.png,Hernia


In [4]:

df_index_label_encoded = pd.concat([df_index_label, df_index_label['Finding Labels'].str.get_dummies(sep="|")], axis=1)
df_index_label_encoded.head()


Unnamed: 0,Image Index,Finding Labels,Atelectasis,Cardiomegaly,Consolidation,Edema,Effusion,Emphysema,Fibrosis,Hernia,Infiltration,Mass,No Finding,Nodule,Pleural_Thickening,Pneumonia,Pneumothorax
0,00000001_000.png,Cardiomegaly,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
1,00000001_001.png,Cardiomegaly|Emphysema,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0
2,00000001_002.png,Cardiomegaly|Effusion,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0
3,00000002_000.png,No Finding,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0
4,00000003_000.png,Hernia,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0


In [5]:

df_index_encoded = df_index_label_encoded.drop(["Finding Labels"], axis=1)
df_index_encoded.head()


Unnamed: 0,Image Index,Atelectasis,Cardiomegaly,Consolidation,Edema,Effusion,Emphysema,Fibrosis,Hernia,Infiltration,Mass,No Finding,Nodule,Pleural_Thickening,Pneumonia,Pneumothorax
0,00000001_000.png,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
1,00000001_001.png,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0
2,00000001_002.png,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0
3,00000002_000.png,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0
4,00000003_000.png,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0


In [6]:

df_index_encoded.to_csv("../dataset/labels_encoded.csv", index=False)


In [7]:

class_map = {}

for idx, column in enumerate(df_index_encoded.columns):
    if idx == 0: continue
    class_map[idx-1] = column
    
class_map


{0: 'Atelectasis',
 1: 'Cardiomegaly',
 2: 'Consolidation',
 3: 'Edema',
 4: 'Effusion',
 5: 'Emphysema',
 6: 'Fibrosis',
 7: 'Hernia',
 8: 'Infiltration',
 9: 'Mass',
 10: 'No Finding',
 11: 'Nodule',
 12: 'Pleural_Thickening',
 13: 'Pneumonia',
 14: 'Pneumothorax'}

In [8]:

with open('../dataset/class_map.json', 'w') as file:
    json.dump(class_map, file)


In [19]:
a = df_index_encoded[df_index_encoded["Image Index"] == "00000001_000.png"].drop(["Image Index"], axis=1).values[0]

In [17]:
'../dataset/class_map.json'.split("/")[-1]

'class_map.json'

In [76]:
import os
import torch
import pandas as pd
import imageio.v3 as iio
from torchvision import transforms
from torch.utils.data import Dataset


class ChestXRDataset(Dataset):

    def __init__(self):
        """

        Custom dataset for ChestXR-14 dataset.

        :param args: input arguments parameters

        """

        self.root      = "../dataset"
        self.is_train  = False

        self.path_images      = os.path.join(self.root, "images")
        self.path_labels_csv  = os.path.join(self.root, "csvs", "labels_encoded.csv")
        self.labels_encoded   = pd.read_csv(self.path_labels_csv)

        if self.is_train:

            self.path_txt = os.path.join(self.root, "txts", "train_val_list.txt")
            self.transforms = transforms.Compose(
                [
                    transforms.ToPILImage(),
                    transforms.ToTensor(),
                ]
            )

        else:

            self.path_txt = os.path.join(self.root, "txts", "test.txt")
            self.transforms = transforms.Compose(
                [
                    transforms.ToPILImage(),
                    transforms.ToTensor(),
                ]
            )

        self.image_paths: list = []

        with open(self.path_txt, 'r') as file:

            for name in file.readlines():
                path_image = os.path.join(self.path_images, name.strip())

                if os.path.exists(path_image):
                    self.image_paths.append(path_image)

    def __len__(self) -> int:
        """

        Return length of dataset.

        :return:
        """
        return len(self.image_paths)

    def __getitem__(self, item: int) -> tuple:
        """

        Return a single item from dataset.

        :param item : index of the item
        :return     : a pair of image and label
            :shape: (np.ndarray)

        """
        image_path = self.image_paths[item]
        image_name = image_path.split("/")[-1]

        label = self.labels_encoded[self.labels_encoded["Image Index"] == image_name].drop(["Image Index"],
                                                                                           axis=1).values[0]
        image = iio.imread(image_path)
        image = torch.tensor(image).unsqueeze(0)

        return self.transforms(image), torch.tensor(label)



In [77]:
dset = ChestXRDataset()

In [78]:
len(dset)

967

In [79]:
dset.__getitem__(0)

(tensor([[[0.2706, 0.2275, 0.1922,  ..., 0.0706, 0.0392, 0.0000],
          [0.2667, 0.2275, 0.1922,  ..., 0.0706, 0.0392, 0.0000],
          [0.2745, 0.2275, 0.1922,  ..., 0.0667, 0.0392, 0.0000],
          ...,
          [0.1725, 0.1686, 0.1686,  ..., 0.8510, 0.9059, 0.5451],
          [0.2039, 0.1961, 0.1961,  ..., 0.8392, 0.9020, 0.5373],
          [0.2392, 0.2314, 0.2314,  ..., 0.8431, 0.9176, 0.5451]]]),
 tensor([0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0]))