# Connect to Drive & Import Library

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import os
from tensorflow.keras.preprocessing.image import ImageDataGenerator, img_to_array, load_img
import numpy as np
import shutil
import random

# Augmenting additional data and combining with spliiting data

In [None]:
ROOT_PATH = "/content/drive/MyDrive/Bangkit 2024"
TRAINING_PATH = f"{ROOT_PATH}/Split_Augmentation/Dataset/train/Mature"
TEST_PATH = f"{ROOT_PATH}/Split_Augmentation/Dataset/test/Mature"
VALIDATION_PATH = f"{ROOT_PATH}/Split_Augmentation/Dataset/valid/Mature"
MATURE_PATH = f"{ROOT_PATH}/Mature"

In [None]:
# Function for image augmentation
def augment_image(image_path, save_to_dir, datagen, prefix):
    img = load_img(image_path)  # Read image
    x = img_to_array(img)  # Image to array conversion
    x = np.expand_dims(x, axis=0)
    i = 0
    for batch in datagen.flow(x, batch_size=1, save_to_dir=save_to_dir, save_prefix=prefix, save_format='jpeg'):
        i += 1
        if i > 5:  # Augmentation of 5 images per original image
            break


In [None]:
# Set ImageDataGenerator for augmentation
datagen = ImageDataGenerator(rotation_range=40, width_shift_range=0.2, height_shift_range=0.2, shear_range=0.2, zoom_range=0.2, horizontal_flip=True, fill_mode='nearest')

In [None]:
# Create a directory to store temporary augmented images
TEMP_AUGMENT_DIR = f"{ROOT_PATH}/Temp_Augment"
os.makedirs(TEMP_AUGMENT_DIR, exist_ok=True)

In [None]:
# Image augmentation in 'mature' class
for filename in os.listdir(MATURE_PATH):
    file_path = os.path.join(MATURE_PATH, filename)
    augment_image(file_path, TEMP_AUGMENT_DIR, datagen, 'Mature_aug')

In [None]:
# Determine the proportion of data for train, test, and valid
def assign_to_set():
    r = random.random()
    if r < 0.8:
        return TRAINING_PATH
    elif r < 0.9:
        return VALIDATION_PATH
    else:
        return TEST_PATH

In [None]:
# Move the augmented image to the appropriate directory
for augmented_file in os.listdir(TEMP_AUGMENT_DIR):
    src_path = os.path.join(TEMP_AUGMENT_DIR, augmented_file)
    dest_dir = assign_to_set()
    dest_path = os.path.join(dest_dir, augmented_file)
    os.makedirs(os.path.dirname(dest_path), exist_ok=True)
    shutil.move(src_path, dest_path)

In [None]:
# Delete temporary directory
shutil.rmtree(TEMP_AUGMENT_DIR)

In [None]:
print("Data augmentation and merging is complete.")

Data augmentation and merging is complete.


# Analyze Dataset

In [None]:
ROOT_PATH = "/content/drive/MyDrive/Bangkit 2024/Split_Augmentation/Dataset"
TRAINING_PATH = f"{ROOT_PATH}/train"
TEST_PATH = f"{ROOT_PATH}/test"
VALIDATION_PATH = f"{ROOT_PATH}/valid"

In [None]:
classes = ['Normal', 'Immature', 'Mature']

In [None]:
def check_dataset(PATH, classes):
  '''
  Print number of images per class
  and total of images
  '''
  num_files = 0
  for cls in classes:
    length = len(os.listdir(os.path.join(PATH, cls)))
    print(f"Number of images for {cls}: {length}")
    num_files += length
  print("="*15)
  print("Number of images total:", num_files)

In [None]:
check_dataset(TRAINING_PATH, classes)

Number of images for Normal: 1304
Number of images for Immature: 1125
Number of images for Mature: 1582
Number of images total: 4011


In [None]:
check_dataset(TEST_PATH, classes)

Number of images for Normal: 458
Number of images for Immature: 378
Number of images for Mature: 454
Number of images total: 1290


In [None]:
check_dataset(VALIDATION_PATH, classes)

Number of images for Normal: 421
Number of images for Immature: 387
Number of images for Mature: 476
Number of images total: 1284
