In [1]:
import os
import numpy as np
from tensorflow.keras.preprocessing.image import ImageDataGenerator, load_img, img_to_array
import gc # Garbage collection to clear RAM

In [2]:
path = '/Users/gavmm/Documents/G/Projects_/Ritcher/CS/Main/Pneuno-new'
trainpath = path + '/train'
validpath = path + '/valid'
testpath = path + '/test'

In [3]:

bacterial = [
    os.path.join(trainpath, 'BACTERIAL'),
    os.path.join(validpath, 'BACTERIAL'),
    os.path.join(testpath, 'BACTERIAL')
]

normal = [
    os.path.join(trainpath, 'NORMAL'),
    os.path.join(validpath, 'NORMAL'),
    os.path.join(testpath, 'NORMAL')
]

viral = [
    os.path.join(trainpath, 'VIRAL'),
    os.path.join(validpath, 'VIRAL'),
    os.path.join(testpath, 'VIRAL')
]
splits = ['Train', 'Validation', 'Test']

In [4]:
#balance_gen = ImageDataGenerator(
 #   rotation_range = 20,
  #  width_shift_range= 0.1,
   # height_shift_range= 0.1,
    #shear_range= 0.1,
    #zoom_range= 0.1,
    #horizontal_flip= True,
    #vertical_flip= False,
    #fill_mode= 'nearest')

In [5]:
balance_gen = ImageDataGenerator(
    rotation_range=10,          # 10Â° is more realistic.
    width_shift_range=0.1,      # Keep: Patients aren't always centered.
    height_shift_range=0.1,     # Keep: Good.
    shear_range=0.1,            # Keep: Good, but keep it low to avoid distorting ribs.
    zoom_range=0.1,             # Keep: Good.
    horizontal_flip=False,      # CHANGED: Keeps anatomy correct (heart on left).
    vertical_flip=False,        # Keep: People are never upside down.
    fill_mode='constant',       # CHANGED: Fills empty space with black...
    cval=0,                     # ...which matches the X-ray background.
    brightness_range=[0.9, 1.1] # ADDED: Simulates different X-ray exposure levels.
)

In [6]:
for i in range(3):
    target_count = len(os.listdir(bacterial[i]))
    current_count = len(os.listdir(normal[i]))
    current_count_2 = len(os.listdir(viral[i]))
    needed = target_count - current_count
    needed_2 = target_count - current_count_2

    print(splits[i])
    print(f"Current Normal: {current_count} | Target: {target_count}")
    print(f"Current Viral: {current_count_2} | Target: {target_count}\n")
    
    print(f"Generating {needed} variations...")
    print(f"Generating {needed_2} variations...\n")


Train
Current Normal: 1074 | Target: 2023
Current Viral: 1077 | Target: 2023

Generating 949 variations...
Generating 946 variations...

Validation
Current Normal: 269 | Target: 507
Current Viral: 270 | Target: 507

Generating 238 variations...
Generating 237 variations...

Test
Current Normal: 11 | Target: 20
Current Viral: 11 | Target: 20

Generating 9 variations...
Generating 9 variations...



In [7]:
for i in range(len(splits)):
    bacterial_folder = bacterial[i]
    normal_folder = normal[i]
    viral_folder = viral[i]
    
    target_count = len(os.listdir(bacterial_folder))
    normal_count = len(os.listdir(normal_folder))
    viral_count = len(os.listdir(viral_folder))
    
    needed_normal = target_count - normal_count
    needed_viral = target_count - viral_count

    print(splits[i])
    print(f"Bacterial (Target): {target_count}")
    print(f"Normal: {normal_count} | Needed: {needed_normal}")
    print(f"Viral:  {viral_count} | Needed: {needed_viral}\n")

    if needed_normal <= 0 and needed_viral <= 0:
        print("You good no need tp generate more variations.\n")
        continue
    
    if needed_normal > 0:
        print(f"Generating {needed_normal} variations for Normal...")
        files = [f for f in os.listdir(normal_folder) if f.lower().endswith(('.png', '.jpg', '.jpeg'))]
        count = 0
        while count < needed_normal:
        # picking a random photo
            random_file = np.random.choice(files)
            img_path = os.path.join(normal_folder, random_file)
            
            try:
                img = load_img(img_path, target_size=(128, 128))
                x = img_to_array(img)
                x = x.reshape((1,) + x.shape)
                
                for batch in balance_gen.flow(x, batch_size=1, 
                                              save_to_dir=normal_folder, 
                                              save_prefix='aug', 
                                              save_format='jpg'):
                    count += 1
                    break
            except Exception as e:
                print(f"Error processing {img_path}: {e}")
                
                if count % 50 == 0:
                    gc.collect()  # Clear RAM every 50 iterations
            print("Normal Aug Complete")
            
    
    if needed_viral > 0:
        print(f"Generating {needed_viral} variations for Viral...")
        files = [f for f in os.listdir(viral_folder) if f.lower().endswith(('.png', '.jpg', '.jpeg'))]
        count = 0
        while count < needed_viral:
        # picking a random photo
            random_file = np.random.choice(files)
            img_path = os.path.join(viral_folder, random_file)
            
            try:
                img = load_img(img_path, target_size=(128, 128))
                x = img_to_array(img)
                x = x.reshape((1,) + x.shape)
                
                for batch in balance_gen.flow(x, batch_size=1, 
                                              save_to_dir=viral_folder, 
                                              save_prefix='aug', 
                                              save_format='jpg'):
                    count += 1
                    break
            except Exception as e:
                print(f"Error processing {img_path}: {e}")
                
                if count % 50 == 0:
                    gc.collect()  # Clear RAM every 50 iterations
            print("Viral Aug Complete")
            print(f"Split {splits[i]} finished.\n")

print("All tasks successful.")

Train
Bacterial (Target): 2023
Normal: 1074 | Needed: 949
Viral:  1077 | Needed: 946

Generating 949 variations for Normal...
Normal Aug Complete
Normal Aug Complete
Normal Aug Complete
Normal Aug Complete
Normal Aug Complete
Normal Aug Complete
Normal Aug Complete
Normal Aug Complete
Normal Aug Complete
Normal Aug Complete
Normal Aug Complete
Normal Aug Complete
Normal Aug Complete
Normal Aug Complete
Normal Aug Complete
Normal Aug Complete
Normal Aug Complete
Normal Aug Complete
Normal Aug Complete
Normal Aug Complete
Normal Aug Complete
Normal Aug Complete
Normal Aug Complete
Normal Aug Complete
Normal Aug Complete
Normal Aug Complete
Normal Aug Complete
Normal Aug Complete
Normal Aug Complete
Normal Aug Complete
Normal Aug Complete
Normal Aug Complete
Normal Aug Complete
Normal Aug Complete
Normal Aug Complete
Normal Aug Complete
Normal Aug Complete
Normal Aug Complete
Normal Aug Complete
Normal Aug Complete
Normal Aug Complete
Normal Aug Complete
Normal Aug Complete
Normal Aug Com

In [8]:
bacterial = [
    os.path.join(trainpath, 'BACTERIAL'),
    os.path.join(validpath, 'BACTERIAL'),
    os.path.join(testpath, 'BACTERIAL')
]

normal = [
    os.path.join(trainpath, 'NORMAL'),
    os.path.join(validpath, 'NORMAL'),
    os.path.join(testpath, 'NORMAL')
]

viral = [
    os.path.join(trainpath, 'VIRAL'),
    os.path.join(validpath, 'VIRAL'),
    os.path.join(testpath, 'VIRAL')
]
splits = ['Train', 'Validation', 'Test']

for i in range(3):
    target_count = len(os.listdir(bacterial[i]))
    current_count = len(os.listdir(normal[i]))
    current_count_2 = len(os.listdir(viral[i]))
    needed = target_count - current_count
    needed_2 = target_count - current_count_2

    print(splits[i])
    print(f"Current Normal: {current_count} | Target: {target_count}")
    print(f"Current Viral: {current_count_2} | Target: {target_count}\n")
    
    print(f"Generating {needed} variations...")
    print(f"Generating {needed_2} variations...\n")

Train
Current Normal: 1977 | Target: 2023
Current Viral: 1974 | Target: 2023

Generating 46 variations...
Generating 49 variations...

Validation
Current Normal: 504 | Target: 507
Current Viral: 504 | Target: 507

Generating 3 variations...
Generating 3 variations...

Test
Current Normal: 20 | Target: 20
Current Viral: 20 | Target: 20

Generating 0 variations...
Generating 0 variations...

