In [56]:
import sys
sys.path.append("/opt/homebrew/Caskroom/miniforge/base/lib/python3.9/site-packages")

In [57]:
import pandas as pd
import data_constants
import os
import tensorflow as tf
import numpy as np

In [58]:
data_folder = data_constants.DATA_FOLDER
images_folder = os.path.join(data_folder, '2A_images')
train_file = os.path.join(data_folder, 'train_COVIDx_CT-2A.txt')
test_file = os.path.join(data_folder, 'test_COVIDx_CT-2A.txt')
val_file = os.path.join(data_folder, 'val_COVIDx_CT-2A.txt')

In [59]:
assert os.path.exists(data_folder), 'Data folder must be downloaded first'
assert os.path.exists(images_folder), 'Images was not downloaded properly'

assert os.path.exists(train_file)
assert os.path.exists(test_file)
assert os.path.exists(val_file)

In [60]:
col_names = ["filename", "class", "xmin", "ymin", "xmax", "ymax"]
train_ds = pd.read_csv(train_file, sep = ' ', names = col_names)
test_ds = pd.read_csv(test_file, sep = ' ', names = col_names)
val_ds = pd.read_csv(val_file, sep = ' ', names = col_names)

In [61]:
train_ds[["filename", "class", "xmin", "ymin", "xmax", "ymax"]]

Unnamed: 0,filename,class,xmin,ymin,xmax,ymax
0,NCP_96_1328_0032.png,2,9,94,512,405
1,NCP_96_1328_0035.png,2,10,106,512,405
2,NCP_96_1328_0036.png,2,10,105,512,406
3,NCP_96_1328_0037.png,2,11,104,512,406
4,NCP_96_1328_0038.png,2,11,103,512,406
...,...,...,...,...,...,...
143773,HUST-Patient97-0288.png,2,56,118,470,412
143774,HUST-Patient97-0289.png,2,56,117,470,412
143775,HUST-Patient97-0290.png,2,56,117,469,412
143776,HUST-Patient97-0291.png,2,57,117,469,412


In [62]:
train_ds['filename'] =  train_ds['filename'].apply(lambda x: os.path.join(images_folder, x))

In [63]:
class DataGenerator(tf.keras.utils.Sequence):
    def __init__(self, df, X_col, y_col,
                 batch_size,
                 input_size=(512, 512),
                 shuffle=True):
        
        self.df = df.copy()
        self.X_col = X_col
        self.y_col = y_col
        self.batch_size = batch_size
        self.input_size = input_size
        self.shuffle = shuffle
        
        self.n = len(self.df)
    
    def __len__(self):
        return int(self.n / self.batch_size)
    
    def __get_input(self, path):
        image = tf.keras.preprocessing.image.load_img(path)
        image_arr = tf.keras.preprocessing.image.img_to_array(image)
        return image_arr/255
    
    def __get_output(self, label, num_classes):
        return tf.keras.utils.to_categorical(label, num_classes=num_classes)
    
    
    def __get_data(self, batches):
        path_batch = batches[self.X_col['path']]
        class_batch = batches[self.y_col['class']]
        bb_batch = batches[[self.y_col['xmin'], self.y_col['ymin'], self.y_col['xmax'], self.y_col['ymax']]]

        X_batch = np.asarray([self.__get_input(x_path) for x_path in path_batch])
        print(bb_batch.shape)
        return X_batch, (class_batch, bb_batch)
    
    def __getitem__(self, index):

        batches = self.df[index * self.batch_size:(index + 1) * self.batch_size]
        X, y = self.__get_data(batches)        
        return X, y

In [64]:
batch_size = 64
traingen = DataGenerator(train_ds,
                         X_col={'path':'filename'},
                         y_col={'class': 'class', 'xmin': 'xmin', 'ymin' : 'ymin', 
                                'xmax' : 'xmax', 'ymax' : 'ymax'},
                         batch_size=batch_size)

In [65]:
for t in traingen:
    X, y = t
    y_class, y_bbox = y
    print('Image shape', X.shape)
    print('Label shape', y_class.shape)
    print('Bounding box shape', y_bbox.shape)
    break

(64, 4)
Image shape (64, 512, 512, 3)
Label shape (64,)
Bounding box shape (64, 4)
