In [9]:
import os
import cv2
import shutil
import random
import numpy as np
import glob

In [6]:
shutil.rmtree("./Dataset_BUSI_with_GT/normal/") #Delete folder normal

FileNotFoundError: [WinError 3] The system cannot find the path specified: './Dataset_BUSI_with_GT/normal/'

In [10]:
# Delete files with "mask" in their name from the benign and malignant directories
for file in glob.glob("./Dataset_BUSI_with_GT/benign/*mask*"):
    os.remove(file)
for file in glob.glob("./Dataset_BUSI_with_GT/malignant/*mask*"):
    os.remove(file)

In [12]:
#Mirror Horizontally all images
folder_path_ben = "./Dataset_BUSI_with_GT/benign/"
folder_path_mal = "./Dataset_BUSI_with_GT/malignant/"

# List all files in the folder
image_files_ben = os.listdir(folder_path_ben)
image_files_mal = os.listdir(folder_path_mal)

# Function to mirror image horizontally
def mirror_image(image_path):
    image = cv2.imread(image_path)
    mirrored_image = cv2.flip(image, 1)  # 1 for horizontal flip
    return mirrored_image

# Apply mirror transformation to each image and save
for image_file in image_files_ben:
    image_path = os.path.join(folder_path_ben, image_file)
    mirrored_image = mirror_image(image_path)
    mirrored_image_path = os.path.join(folder_path_ben, "mirrored_" + image_file)
    cv2.imwrite(mirrored_image_path, mirrored_image)

for image_file in image_files_mal:
    image_path = os.path.join(folder_path_mal, image_file)
    mirrored_image = mirror_image(image_path)
    mirrored_image_path = os.path.join(folder_path_mal, "mirrored_" + image_file)
    cv2.imwrite(mirrored_image_path, mirrored_image)

In [13]:
#Zoom, Lower Contrast, CLAHE, Gaussian Blur and Salt and pepper noise addition functions
#Zoom image on the center function
def zoom_image_center(image, zoom_factor=1.2):
    height, width = image.shape[:2]
    new_height, new_width = int(height / zoom_factor), int(width / zoom_factor)
    top = (height - new_height) // 2
    left = (width - new_width) // 2
    bottom, right = top + new_height, left + new_width
    cropped_image = image[top:bottom, left:right]
    zoomed_image = cv2.resize(cropped_image, (width, height), interpolation=cv2.INTER_LINEAR)
    return zoomed_image

#Lower Contrast function
def lower_contrast(image_path):
    image = cv2.imread(image_path)
    img_float = image.astype(float)
    b, g, r = cv2.split(img_float)
    factor = 1.6  #Contrast factor (factor < 1 for darker picture, factor > 1 for brighter picture and lower contrast)
    b_low_contrast = b * factor
    g_low_contrast = g * factor
    r_low_contrast = r * factor
    low_contrast_img = cv2.merge((b_low_contrast, g_low_contrast, r_low_contrast)) #Convert image back to [0,255]
    low_contrast_img = cv2.convertScaleAbs(low_contrast_img)
    return low_contrast_img
    
#Contrast Limited Adaptive Histogram Equalization Function
def clahe_image(image_path):
    image = cv2.imread(image_path)
    gray_img = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    clahe = cv2.createCLAHE(clipLimit=1.5, tileGridSize=(8, 8))
    cl_img = clahe.apply(gray_img)
    return cl_img
    
#Gaussian Blur Addition function
def add_blur(image_path):
    image = cv2.imread(image_path)
    blurred_img = cv2.GaussianBlur(image, (13, 13), 0)
    return blurred_img
    
#Salt and Pepper addition function
def add_salt_and_pepper_noise(image_path, salt_prob=0.03, pepper_prob=0.03):
    image = cv2.imread(image_path)
    noisy_img = np.copy(image)
    num_salt = np.ceil(salt_prob * image.size)
    num_pepper = np.ceil(pepper_prob * image.size)
    coords = [np.random.randint(0, i - 1, int(num_salt)) for i in image.shape]#Salt noise (white pixels)
    noisy_img[coords[0], coords[1], :] = 1
    coords = [np.random.randint(0, i - 1, int(num_pepper)) for i in image.shape]#Pepper noise (black pixels)
    noisy_img[coords[0], coords[1], :] = 0
    return noisy_img

In [15]:
#Functions applied on benign images
folder_path = "./Dataset_BUSI_with_GT/benign/"

image_files = os.listdir(folder_path)

selected_images_zoom = random.sample(image_files, 150)
selected_images_contrast = random.sample(image_files, 125)
selected_images_clahe = random.sample(image_files, 125)
selected_images_blur = random.sample(image_files, 125)
selected_images_snp = random.sample(image_files, 125)

for image_file in selected_images_zoom:
    image_path = os.path.join(folder_path, image_file)
    image = cv2.imread(image_path)
    zoomed_image = zoom_image_center(image)
    output_path = os.path.join(folder_path, "zoomed_" + image_file)
    cv2.imwrite(output_path, zoomed_image)

for image_file in selected_images_contrast:
    image_path = os.path.join(folder_path, image_file)
    low_contrast_image = lower_contrast(image_path)
    lower_contrast_image_path = os.path.join(folder_path, "lower_contrast_" + image_file)
    cv2.imwrite(lower_contrast_image_path, low_contrast_image)
    
for image_file in selected_images_clahe:
    image_path = os.path.join(folder_path, image_file)
    cl_image = clahe_image(image_path)
    cl_image_path = os.path.join(folder_path, "clahe_" + image_file)
    cv2.imwrite(cl_image_path, cl_image)
    
for image_file in selected_images_blur:
    image_path = os.path.join(folder_path, image_file)
    blurred_image = add_blur(image_path)
    blurred_image_path = os.path.join(folder_path, "blurred_" + image_file)
    cv2.imwrite(blurred_image_path, blurred_image)

for image_file in selected_images_snp:
    image_path = os.path.join(folder_path, image_file)
    snp_image = add_salt_and_pepper_noise(image_path)
    snp_image_path = os.path.join(folder_path, "snp_" + image_file)
    cv2.imwrite(snp_image_path, snp_image)

In [16]:
#Functions applied on malignant images
folder_path = "./Dataset_BUSI_with_GT/malignant/"

image_files = os.listdir(folder_path)

selected_images_zoom = random.sample(image_files, 315)
selected_images_contrast = random.sample(image_files, 190)
selected_images_clahe = random.sample(image_files, 190)
selected_images_blur = random.sample(image_files, 190)
selected_images_snp = random.sample(image_files, 190)

for image_file in selected_images_zoom:
    image_path = os.path.join(folder_path, image_file)
    image = cv2.imread(image_path)
    zoomed_image = zoom_image_center(image)
    output_path = os.path.join(folder_path, "zoomed_" + image_file)
    cv2.imwrite(output_path, zoomed_image)

for image_file in selected_images_contrast:
    image_path = os.path.join(folder_path, image_file)
    low_contrast_image = lower_contrast(image_path)
    lower_contrast_image_path = os.path.join(folder_path, "lower_contrast_" + image_file)
    cv2.imwrite(lower_contrast_image_path, low_contrast_image)
    
for image_file in selected_images_clahe:
    image_path = os.path.join(folder_path, image_file)
    cl_image = clahe_image(image_path)
    cl_image_path = os.path.join(folder_path, "clahe_" + image_file)
    cv2.imwrite(cl_image_path, cl_image)
    
for image_file in selected_images_blur:
    image_path = os.path.join(folder_path, image_file)
    blurred_image = add_blur(image_path)
    blurred_image_path = os.path.join(folder_path, "blurred_" + image_file)
    cv2.imwrite(blurred_image_path, blurred_image)

for image_file in selected_images_snp:
    image_path = os.path.join(folder_path, image_file)
    snp_image = add_salt_and_pepper_noise(image_path)
    snp_image_path = os.path.join(folder_path, "snp_" + image_file)
    cv2.imwrite(snp_image_path, snp_image)

In [18]:
# Train Validation and Test sets creation
base_dir = os.path.join(os.path.expanduser('~'), 'Desktop', 'data', 'Dataset_BUSI_with_GT')
benign_dir = os.path.join(base_dir, 'benign')
malignant_dir = os.path.join(base_dir, 'malignant')

train_dir = os.path.join(base_dir, 'train')
val_dir = os.path.join(base_dir, 'val')
test_dir = os.path.join(base_dir, 'test')

# Subdirectories for benign and malignant
for category in ['benign', 'malignant']:
    os.makedirs(os.path.join(train_dir, category), exist_ok=True)
    os.makedirs(os.path.join(val_dir, category), exist_ok=True)
    os.makedirs(os.path.join(test_dir, category), exist_ok=True)

# Function to split images on a 70-15-15 percentage and copy them on the train,val and test folders
def split_and_copy_images(category, src_dir, train_dir, val_dir, test_dir, train_split=0.7, val_split=0.15, test_split=0.15):
    images = glob.glob(os.path.join(src_dir, '*'))  # * to include all files in the directory
    random.shuffle(images) #Shuffle images randomly
    total_images = len(images)
    train_count = int(total_images * train_split)
    val_count = int(total_images * val_split)
    train_images = images[:train_count]
    val_images = images[train_count:train_count + val_count]
    test_images = images[train_count + val_count:]
    
    for img in train_images:
        shutil.copy(img, os.path.join(train_dir, category))
        
    for img in val_images:
        shutil.copy(img, os.path.join(val_dir, category))
        
    for img in test_images:
        shutil.copy(img, os.path.join(test_dir, category))

# Split and copy benign images
split_and_copy_images('benign', benign_dir, train_dir, val_dir, test_dir)

# Split and copy malignant images
split_and_copy_images('malignant', malignant_dir, train_dir, val_dir, test_dir)

print("Images have been split and copied on the folders successfully.")

Images have been split and copied on the folders successfully.
