In [26]:
from difPy import dif
search = dif("Original Images")

DifPy preparing files: [956/956] [100%]
DifPy comparing images: [954/954] [100%]
Found 0 images with one or more duplicate/similar images in 106.9404 seconds.


In [27]:
import os
import shutil
from Augmentor import Pipeline
import splitfolders

# Set base directory
base_dir = './Original Images'

# Set source directories
monkeypox_dir = os.path.join(base_dir, 'Monkeypox')
others_dir = os.path.join(base_dir, 'Others')

# Set directories for each dataset
train_dir = os.path.join(base_dir, 'train')
test_dir = os.path.join(base_dir, 'test')
val_dir = os.path.join(base_dir, 'val')

# Set directories for each class in each dataset
train_monkeypox_dir = os.path.join(train_dir, 'Monkeypox')
train_others_dir = os.path.join(train_dir, 'Others')
test_monkeypox_dir = os.path.join(test_dir, 'Monkeypox')
test_others_dir = os.path.join(test_dir, 'Others')
val_monkeypox_dir = os.path.join(val_dir, 'Monkeypox')
val_others_dir = os.path.join(val_dir, 'Others')

In [28]:
# Split photo data and randomly into training, testing, and validation sets (60%, 10%, 30%)
splitfolders.ratio('Original Images', output="Original Images", seed=42, ratio=(.6, 0.1,0.3)) 

Copying files: 954 files [00:01, 739.39 files/s]


In [29]:
# Rename files in a systematic way
i = 1
for fname in os.listdir(train_monkeypox_dir):
    new_name = 'monkeypox_train_' + str(i) + '.jpg'
    src = os.path.join(train_monkeypox_dir, fname)
    dst = os.path.join(train_monkeypox_dir, new_name)
    os.rename(src, dst)
    i += 1

i = 1
for fname in os.listdir(train_others_dir):
    new_name = 'others_train_' + str(i) + '.jpg'
    src = os.path.join(train_others_dir, fname)
    dst = os.path.join(train_others_dir, new_name)
    os.rename(src, dst)
    i += 1

i = 1
for fname in os.listdir(test_monkeypox_dir):
    new_name = 'monkeypox_test_' + str(i) + '.jpg'
    src = os.path.join(test_monkeypox_dir, fname)
    dst = os.path.join(test_monkeypox_dir, new_name)
    os.rename(src, dst)
    i += 1

i = 1
for fname in os.listdir(test_others_dir):
    new_name = 'others_test_' + str(i) + '.jpg'
    src = os.path.join(test_others_dir, fname)
    dst = os.path.join(test_others_dir, new_name)
    os.rename(src, dst)
    i += 1

i = 1
for fname in os.listdir(val_monkeypox_dir):
    new_name = 'monkeypox_val_' + str(i) + '.jpg'
    src = os.path.join(val_monkeypox_dir, fname)
    dst = os.path.join(val_monkeypox_dir, new_name)
    os.rename(src, dst)
    i += 1

i = 1
for fname in os.listdir(val_others_dir):
    new_name = 'others_val_' + str(i) + '.jpg'
    src = os.path.join(val_others_dir, fname)
    dst = os.path.join(val_others_dir, new_name)
    os.rename(src, dst)
    i += 1

In [30]:
# Create augmentations on training dataset
p = Pipeline(train_monkeypox_dir)
p.rotate(probability=0.7, max_left_rotation=10, max_right_rotation=10)
p.zoom(probability=0.5, min_factor=1.1, max_factor=1.5)
p.skew_tilt(probability=0.5)
p.skew_corner(probability=0.5)
p.sample(600)

p = Pipeline(train_others_dir)
p.rotate(probability=0.7, max_left_rotation=10, max_right_rotation=10)
p.zoom(probability=0.5, min_factor=1.1, max_factor=1.5)
p.skew_tilt(probability=0.5)
p.skew_corner(probability=0.5)
p.sample(600)

Initialised with 209 image(s) found.
Output directory set to ./Original Images\train\Monkeypox\output.

Processing <PIL.Image.Image image mode=RGB size=224x224 at 0x2B6BDCC1390>: 100%|██████████| 600/600 [00:03<00:00, 195.00 Samples/s]                


Initialised with 363 image(s) found.
Output directory set to ./Original Images\train\Others\output.

Processing <PIL.Image.Image image mode=RGB size=224x224 at 0x2B6BDCEF700>: 100%|██████████| 600/600 [00:02<00:00, 215.59 Samples/s]                  


In [31]:
import re

# Rename the augmented images in the output folder, keeping *monkeypox_* and *others_* in the name
augmented_m_dir = "./Original Images/train/Monkeypox/output"
augmented_m_files = os.listdir(augmented_m_dir)
augmented_o_dir = "./Original Images/train/Others/output"
augmented_o_files = os.listdir(augmented_o_dir)

for i in range(len(augmented_m_files)):
    filenum = re.findall(r'\d+', str(augmented_m_files[i]))
    os.rename(os.path.join(augmented_m_dir, augmented_m_files[i]), os.path.join(augmented_m_dir, 
        "monkeypox_train_" + filenum[0] + "_" + str(i) + ".jpg"))
    del filenum

for i in range(len(augmented_o_files)):
    filenum = re.findall(r'\d+', augmented_o_files[i])
    os.rename(os.path.join(augmented_o_dir, augmented_o_files[i]), os.path.join(augmented_o_dir, 
        "others_train_" + filenum[0] + "_" + str(i) + ".jpg"))
    del filenum


In [32]:
augmented_m_files = os.listdir(augmented_m_dir)
augmented_o_files = os.listdir(augmented_o_dir)

# Move the augmented images to the training subfolders
for i in range(len(augmented_m_files)):
    shutil.move(os.path.join(augmented_m_dir, augmented_m_files[i]), os.path.join(train_monkeypox_dir, augmented_m_files[i]))

for i in range(len(augmented_o_files)):
    shutil.move(os.path.join(augmented_o_dir, augmented_o_files[i]), os.path.join(train_others_dir, augmented_o_files[i]))

# Remove the output folders
shutil.rmtree(augmented_m_dir)
shutil.rmtree(augmented_o_dir)

In [33]:
# Check if directories have correct number of files
print('Training Monkeypox images:', len(os.listdir(train_monkeypox_dir)))
print('Training Others images:', len(os.listdir(train_others_dir)))
print('Testing Monkeypox images:', len(os.listdir(test_monkeypox_dir)))
print('Testing Others images:', len(os.listdir(test_others_dir)))
print('Validation Monkeypox images:', len(os.listdir(val_monkeypox_dir)))
print('Validation Others images:', len(os.listdir(val_others_dir)))

Training Monkeypox images: 809
Training Others images: 963
Testing Monkeypox images: 106
Testing Others images: 182
Validation Monkeypox images: 34
Validation Others images: 60
