# **Data Augmentation**

## **1. Install Dependencies**

In [8]:
%pip install tensorflow scipy

Collecting scipy
  Downloading scipy-1.15.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (61 kB)
Downloading scipy-1.15.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (40.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m40.3/40.3 MB[0m [31m2.2 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0mm
[?25hInstalling collected packages: scipy
Successfully installed scipy-1.15.0

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.2[0m[39;49m -> [0m[32;49m24.3.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [9]:
%pip list

Package                 Version
----------------------- -----------
absl-py                 2.1.0
asttokens               2.4.1
astunparse              1.6.3
certifi                 2024.8.30
charset-normalizer      3.4.0
comm                    0.2.2
contourpy               1.3.1
cycler                  0.12.1
debugpy                 1.8.9
decorator               5.1.1
executing               2.1.0
flatbuffers             24.3.25
fonttools               4.55.0
gast                    0.6.0
google-pasta            0.2.0
grpcio                  1.68.0
h5py                    3.12.1
idna                    3.10
ipykernel               6.29.5
ipython                 8.29.0
jedi                    0.19.2
jupyter_client          8.6.3
jupyter_core            5.7.2
keras                   3.6.0
kiwisolver              1.4.7
libclang                18.1.1
Markdown                3.7
markdown-it-py          3.0.0
MarkupSafe              3.0.2
matplotlib              3.9.2
matplotlib-inline    

## **2. Data Augmentation**

### **2.1. Single Image**

In [None]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import array_to_img, img_to_array, load_img
from tensorflow.keras.utils import img_to_array, load_img

# Create data generator
datagen = tf.keras.preprocessing.image.ImageDataGenerator(
    rotation_range=40,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

# Load and preprocess the image
img = load_img('Dataset/New/Original/NailMelanoma/NailMelanoma (1).jpg')
x = img_to_array(img)
x = tf.expand_dims(x, 0)  # Add batch dimension

# Generate and save augmented images
i = 0
for batch in datagen.flow(
    x, 
    batch_size=1,
    save_to_dir='Dataset/New/Augmented/NailMelanoma',
    save_prefix='NailMelanoma',
    save_format='jpg'
):
    i += 1
    if i >= 20:
        break  # Stop after generating 20 images

### **2.2. Batch Images**

In [None]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import array_to_img, img_to_array, load_img
from tensorflow.keras.utils import img_to_array, load_img
import os
from pathlib import Path

def augment_images(
    input_dir,
    output_dir,
    augmentations_per_image=20,
    seed=42,
):
    # Create output directory if it doesn't exist
    Path(output_dir).mkdir(parents=True, exist_ok=True)
    
    # Create data generator
    datagen = tf.keras.preprocessing.image.ImageDataGenerator(
        rotation_range=40,
        width_shift_range=0.2,
        height_shift_range=0.2,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True,
        fill_mode='nearest'
    )
    
    # Get list of all images in input directory
    valid_extensions = {'.jpg', '.jpeg', '.png'}
    image_files = [
        f for f in os.listdir(input_dir) 
        if os.path.splitext(f.lower())[1] in valid_extensions
    ]
    
    print(f"Found {len(image_files)} images to process")
    
    # Process each image
    for idx, image_file in enumerate(image_files, 1):
        try:
            # Load and preprocess the image
            input_path = os.path.join(input_dir, image_file)
            img = load_img(input_path)
            x = img_to_array(img)
            x = tf.expand_dims(x, 0)
            
            # Generate and save augmented versions
            i = 0
            base_filename = os.path.splitext(image_file)[0]
            
            for batch in datagen.flow(
                x,
                batch_size=1,
                save_to_dir=output_dir,
                save_prefix=f"{base_filename}",
                save_format='jpg'
            ):
                i += 1
                if i >= augmentations_per_image:
                    break
            
            print(f"Processed image {idx}/{len(image_files)}: {image_file}")
            
        except Exception as e:
            print(f"Error processing {image_file}: {str(e)}")
            continue

# Augment NailMelanoma Dataset
input_directory = 'Dataset/New/Original/NailMelanoma'
output_directory = 'Dataset/New/Augmented/NailMelanoma'

augment_images(
    input_dir=input_directory,
    output_dir=output_directory,
    augmentations_per_image=20,  # Number of augmented versions per image
)

# Augment Healthy Dataset
input_directory = 'Dataset/New/Original/Healthy'
output_directory = 'Dataset/New/Augmented/Healthy'

augment_images(
    input_dir=input_directory,
    output_dir=output_directory,
    augmentations_per_image=20,  # Number of augmented versions per image
)

Found 100 images to process
Processed image 1/100: Healthy (92).jpg
Processed image 2/100: Healthy (75).jpg
Processed image 3/100: Healthy (18).jpg
Processed image 4/100: Healthy (68).jpg
Processed image 5/100: Healthy (51).jpg
Processed image 6/100: Healthy (87).jpg
Processed image 7/100: Healthy (94).jpg
Processed image 8/100: Healthy (31).jpg
Processed image 9/100: Healthy (64).jpg
Processed image 10/100: Healthy (48).jpg
Processed image 11/100: Healthy (15).jpg
Processed image 12/100: Healthy (42).jpg
Processed image 13/100: Healthy (2).jpg
Processed image 14/100: Healthy (43).jpg
Processed image 15/100: Healthy (99).jpg
Processed image 16/100: Healthy (91).jpg
Processed image 17/100: Healthy (61).jpg
Processed image 18/100: Healthy (12).jpg
Processed image 19/100: Healthy (71).jpg
Processed image 20/100: Healthy (65).jpg
Processed image 21/100: Healthy (53).jpg
Processed image 22/100: Healthy (67).jpg
Processed image 23/100: Healthy (32).jpg
Processed image 24/100: Healthy (60).jp

In [None]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import array_to_img, img_to_array, load_img
from tensorflow.keras.utils import img_to_array, load_img
import os
from pathlib import Path
import numpy as np

def augment_images(
    input_dir,
    output_dir,
    augmentations_per_image=20,
    seed=42  # Added seed for reproducibility
):
    # Create output directory if it doesn't exist
    Path(output_dir).mkdir(parents=True, exist_ok=True)
    
    # Create data generator with fixed seed
    datagen = tf.keras.preprocessing.image.ImageDataGenerator(
        rotation_range=40,
        width_shift_range=0.2,
        height_shift_range=0.2,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True,
        fill_mode='nearest'
    )
    
    # Get list of all images in input directory
    valid_extensions = {'.jpg', '.jpeg', '.png'}
    image_files = [
        f for f in os.listdir(input_dir) 
        if os.path.splitext(f.lower())[1] in valid_extensions
    ]
    
    total_images = len(image_files)
    print(f"Found {total_images} images to process")
    expected_total = total_images * augmentations_per_image
    print(f"Expected output: {expected_total} augmented images")
    
    processed_count = 0
    
    # Process each image
    for idx, image_file in enumerate(image_files, 1):
        try:
            # Load and preprocess the image
            input_path = os.path.join(input_dir, image_file)
            img = load_img(input_path)
            x = img_to_array(img)
            x = tf.expand_dims(x, 0)
            
            base_filename = os.path.splitext(image_file)[0]
            current_count = 0
            
            # Set a specific seed for each image to ensure reproducibility
            image_seed = seed + idx
            np.random.seed(image_seed)
            tf.random.set_seed(image_seed)
            
            # Generate augmented images one at a time
            while current_count < augmentations_per_image:
                # Generate the augmented image
                aug_img = next(datagen.flow(
                    x,
                    batch_size=1,
                    seed=image_seed + current_count
                ))
                
                # Save the augmented image with a guaranteed unique filename
                output_filename = f"{base_filename}_{current_count+1:03d}.jpg"
                output_path = os.path.join(output_dir, output_filename)
                tf.keras.preprocessing.image.save_img(output_path, aug_img[0])
                
                current_count += 1
                processed_count += 1
            
            print(f"Processed image {idx}/{total_images}: {image_file} - Generated {current_count} augmentations")
            
        except Exception as e:
            print(f"Error processing {image_file}: {str(e)}")
            continue
    
    print(f"\nProcessing complete!")
    print(f"Total augmented images generated: {processed_count}")
    assert processed_count == expected_total, f"Expected {expected_total} images but generated {processed_count}"

# Augment NailMelanoma
input_directory = 'Dataset/New/Original/NailMelanoma'
output_directory = 'Dataset/New/Augmented/NailMelanoma'

augment_images(
    input_dir=input_directory,
    output_dir=output_directory,
    augmentations_per_image=20,
    seed=42  # Add seed for reproducibility
)

# Augment Healthy
input_directory = 'Dataset/New/Original/Healthy'
output_directory = 'Dataset/New/Augmented/Healthy'

augment_images(
    input_dir=input_directory,
    output_dir=output_directory,
    augmentations_per_image=20,
    seed=42  # Add seed for reproducibility
)

Found 100 images to process
Expected output: 2000 augmented images
Processed image 1/100: Healthy (92).jpg - Generated 20 augmentations
Processed image 2/100: Healthy (75).jpg - Generated 20 augmentations
Processed image 3/100: Healthy (18).jpg - Generated 20 augmentations
Processed image 4/100: Healthy (68).jpg - Generated 20 augmentations
Processed image 5/100: Healthy (51).jpg - Generated 20 augmentations
Processed image 6/100: Healthy (87).jpg - Generated 20 augmentations
Processed image 7/100: Healthy (94).jpg - Generated 20 augmentations
Processed image 8/100: Healthy (31).jpg - Generated 20 augmentations
Processed image 9/100: Healthy (64).jpg - Generated 20 augmentations
Processed image 10/100: Healthy (48).jpg - Generated 20 augmentations
Processed image 11/100: Healthy (15).jpg - Generated 20 augmentations
Processed image 12/100: Healthy (42).jpg - Generated 20 augmentations
Processed image 13/100: Healthy (2).jpg - Generated 20 augmentations
Processed image 14/100: Healthy (