# 0. Import

In [1]:
import cv2
import numpy as np
import os
from shutil import copyfile
import random
import uuid

# 1. Resize data

In [2]:
dataset_path = "dataset"

resized_path = "resized_dataset"
os.makedirs(resized_path, exist_ok=True)

In [3]:
def resizeImage(img, size=(224,224)):
    height, width = img.shape[:2]
    if height == width:
        return cv2.resize(img, size, interpolation=cv2.INTER_AREA) if height > size[0] else cv2.resize(img, size, interpolation=cv2.INTER_CUBIC)
    
    max_dim = max(height, width)
    top = (max_dim - height) // 2
    bottom = max_dim - height - top
    left = (max_dim - width) // 2
    right = max_dim - width - left

    padded_img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=(0, 0, 0))
    return cv2.resize(padded_img, size, interpolation=cv2.INTER_AREA) if max_dim > size[0] else cv2.resize(padded_img, size, interpolation=cv2.INTER_CUBIC)

In [4]:
def process_img(folder, destination):
    print('folder',folder)
    print('destination',destination)
    os.makedirs(destination, exist_ok=True)

    for image in os.listdir(folder):
        # print('image', image)
        filename = image
        try:
            img = cv2.imread(os.path.join(folder, image))
            resized = resizeImage(img, (224, 224))
            new_filename = "resized_" + filename
            cv2.imwrite(os.path.join(destination, new_filename), resized)
        except Exception as e:
            print(e)
            print("Error in resizing image: ", filename)
            continue   

In [5]:
process_img(os.path.join(dataset_path, 'freshoranges'), destination=os.path.join(resized_path, 'freshoranges'))
process_img(os.path.join(dataset_path, 'rottenoranges'),  destination=os.path.join(resized_path, 'rottenoranges'))

print('Images resized successfully!')

folder dataset\freshoranges
destination resized_dataset\freshoranges
folder dataset\rottenoranges
destination resized_dataset\rottenoranges
Images resized successfully!


# 2. Split dataset to Train/Test/Validation

In [6]:
try:
    source_folder = 'fruit_dataset'

    train_dir = os.path.join(source_folder, 'train')
    test_dir = os.path.join(source_folder, 'test')
    validation_dir = os.path.join(source_folder, 'validation')

    train_rotten_dir = os.path.join(train_dir, 'Rotten Oranges')
    train_fresh_dir = os.path.join(train_dir, 'Fresh Oranges')

    test_rotten_dir = os.path.join(test_dir, 'Rotten Oranges')
    test_fresh_dir = os.path.join(test_dir, 'Fresh Oranges')

    validation_rotten_dir = os.path.join(validation_dir, 'Rotten Oranges')
    validation_fresh_dir = os.path.join(validation_dir, 'Fresh Oranges')

    os.makedirs(source_folder, exist_ok=True)
    os.makedirs(train_dir, exist_ok=True)
    os.makedirs(test_dir, exist_ok=True)
    os.makedirs(validation_dir, exist_ok=True)

    os.makedirs(train_rotten_dir, exist_ok=True)
    os.makedirs(train_fresh_dir, exist_ok=True)

    os.makedirs(test_rotten_dir, exist_ok=True)
    os.makedirs(test_fresh_dir, exist_ok=True)

    os.makedirs(validation_rotten_dir, exist_ok=True)
    os.makedirs(validation_fresh_dir, exist_ok=True)

except OSError:
    pass

In [7]:
#SPLIT
source_datasets = 'resized_dataset'

foranges_dir = os.path.join(source_datasets, 'freshoranges')

roranges_dir = os.path.join(source_datasets, 'rottenoranges')

In [8]:
def split_data(SOURCE='', TRAINING='', TESTING='', VALIDATION='',  SPLIT_SIZE=(0.6, 0.2, 0.2)):
    data = os.listdir(SOURCE)
    random.shuffle(data)

    train_size = int(len(data) * SPLIT_SIZE[0])
    test_size = int(len(data) * SPLIT_SIZE[1])
    validate_size = len(data) - train_size - test_size

    if(TRAINING != ''):
        for i, filename in enumerate(data):
            filepath = os.path.join(SOURCE, filename)
            if os.path.getsize(filepath) > 0:
                if i < train_size:
                    copyfile(filepath, os.path.join(TRAINING, filename))
                elif i < train_size + test_size:
                    copyfile(filepath, os.path.join(TESTING, filename))
                else:
                    copyfile(filepath, os.path.join(VALIDATION, filename))
    else:
        for i, filename in enumerate(data):
            filepath = os.path.join(SOURCE, filename)
            if os.path.getsize(filepath) > 0:
                if i < test_size:
                    copyfile(filepath, os.path.join(TESTING, filename))
                else:
                    copyfile(filepath, os.path.join(VALIDATION, filename))

In [9]:
split_data(roranges_dir, train_rotten_dir, test_rotten_dir, validation_rotten_dir, (0.6, 0.2, 0.2))
split_data(foranges_dir, train_fresh_dir, test_fresh_dir, validation_fresh_dir, (0.6, 0.2, 0.2))

## Copy directory

In [10]:
source_folder = 'fruit_dataset'
destination_folder = 'augmented_dataset'

In [11]:
def create_directory_structure(source_folder, destination_folder, depth):
    # Kiểm tra nếu depth là 0, không cần tạo thư mục nữa
    if depth == 0:
        return

    # Duyệt qua các thư mục trong thư mục nguồn
    for folder_name in os.listdir(source_folder):
        source_subfolder = os.path.join(source_folder, folder_name)
        # Nếu là thư mục
        if os.path.isdir(source_subfolder):
            destination_subfolder = os.path.join(destination_folder, folder_name)
            # Tạo thư mục đích
            os.makedirs(destination_subfolder, exist_ok=True)
            # Sao chép dữ liệu từ thư mục nguồn sang thư mục đích
            for file_name in os.listdir(source_subfolder):
                source_file = os.path.join(source_subfolder, file_name)
                if os.path.isfile(source_file):
                    destination_file = os.path.join(destination_subfolder, file_name)
                    copyfile(source_file, destination_file)
            # Tiếp tục đệ quy để tạo cấu trúc thư mục bên trong
            create_directory_structure(source_subfolder, destination_subfolder, depth - 1)

In [12]:
# Tạo cây thư mục và sao chép dữ liệu
create_directory_structure(source_folder, destination_folder, depth=3)

# 3. Data Augment

In [13]:
destionation_dir = 'augmented_dataset'

In [15]:
def random_rotation():
    angles = [15, 30, 45, 60, 75]  
    return random.choice(angles) 

def shift(image, width_shift_range=0.2, height_shift_range=0.2):
    tx = np.random.uniform(-width_shift_range, width_shift_range) * image.shape[1]
    ty = np.random.uniform(-height_shift_range, height_shift_range) * image.shape[0]
    translation_matrix = np.float32([[1, 0, tx], [0, 1, ty]])
    shifted_image = cv2.warpAffine(image, translation_matrix, (image.shape[1], image.shape[0]))
    return shifted_image

def rotation(image):
    rotation_angle = random_rotation()
    rotation_matrix = cv2.getRotationMatrix2D((image.shape[1] / 2, image.shape[0] / 2), rotation_angle, 1)
    rotated_image = cv2.warpAffine(image, rotation_matrix, (image.shape[1], image.shape[0]))
    return rotated_image

def flip(image):
    flip_code = random.choice([-1, 0, 1])
    flipped_image = cv2.flip(image, flip_code)
    return flipped_image

def salt_noise(image):
    if np.random.rand() < 0.5:
        salt_noise = np.random.rand(*image.shape) > 0.95
        image[salt_noise] = 1.0 
    return image

In [16]:
def augment_data(folder):
    for filename in os.listdir(folder):
        img = cv2.imread(os.path.join(folder, filename))
        for method_name, method_func in [("shift", shift), ("rotation", rotation), ("flip", flip), ("salt_noise", salt_noise)]:
            augmented_img = method_func(img)
            new_filename = f"{method_name}_{'rottenOrange'}_{str(uuid.uuid4())}.jpg"
            save_path = os.path.join(folder, new_filename)
            cv2.imwrite(save_path, augmented_img)

In [17]:
augment_data(os.path.join(destionation_dir, 'train', 'Rotten Oranges') )

### Copy available train set of fresh oranges to augmented_dataset/train/Fresh Oranges

In [18]:
source_folder = os.path.join(dataset_path, 'freshoranges', 'train')
destination_folder = os.path.join('augmented_dataset', 'train', 'Fresh Oranges')

In [19]:

def copyFolder(source_folder, destination_folder):
    # Tạo thư mục đích nếu chưa tồn tại
    if not os.path.exists(destination_folder):
        os.makedirs(destination_folder)

    # Sao chép tất cả các tệp từ thư mục nguồn sang thư mục đích
    for filename in os.listdir(source_folder):
        source_file = os.path.join(source_folder, filename)
        destination_file = os.path.join(destination_folder, filename)
        copyfile(source_file, destination_file)

In [20]:
copyFolder(source_folder, destination_folder)

##### Convert some images have endswith by '.jpg' to '.png'

In [2]:
from PIL import Image
import os


def convert_jpg_to_png(folder_path):
    # Lặp qua tất cả các tệp trong thư mục
    for filename in os.listdir(folder_path):
        # Kiểm tra xem tệp có phải là ảnh jpg không
        if filename.endswith(".jpg") | filename.endswith(".JPG") | filename.endswith(".PNG"):
            # Đường dẫn đầy đủ của tệp ảnh đầu vào
            input_path = os.path.join(folder_path, filename)
            # Tạo tên tệp đầu ra với cùng tên nhưng đuôi mở rộng là .png
            output_path = os.path.join(folder_path, os.path.splitext(filename)[0] + "_.png")

            # Mở ảnh đầu vào và chuyển đổi sang định dạng PNG
            try:
                with Image.open(input_path) as img:
                    img.save(output_path, "PNG")
                print(f"Converted {filename} to PNG")

                # Xóa tệp ảnh jpg cũ
                os.remove(input_path)
                print(f"Deleted old file: {input_path}")
            except Exception as e:
                print(f"Failed to convert {filename}: {e}")

# Thư mục chứa ảnh JPG cần chuyển đổi
path__ = 'rotten'
# Gọi hàm chuyển đổi
convert_jpg_to_png(path__)


Converted rottenFresh (1).jpg to PNG
Deleted old file: rotten\rottenFresh (1).jpg
Converted rottenFresh (10).jpg to PNG
Deleted old file: rotten\rottenFresh (10).jpg
Converted rottenFresh (11).jpg to PNG
Deleted old file: rotten\rottenFresh (11).jpg
Converted rottenFresh (12).jpg to PNG
Deleted old file: rotten\rottenFresh (12).jpg
Converted rottenFresh (13).jpg to PNG
Deleted old file: rotten\rottenFresh (13).jpg
Converted rottenFresh (14).jpg to PNG
Deleted old file: rotten\rottenFresh (14).jpg
Converted rottenFresh (15).jpg to PNG
Deleted old file: rotten\rottenFresh (15).jpg
Converted rottenFresh (16).jpg to PNG
Deleted old file: rotten\rottenFresh (16).jpg
Converted rottenFresh (17).jpg to PNG
Deleted old file: rotten\rottenFresh (17).jpg
Converted rottenFresh (18).jpg to PNG
Deleted old file: rotten\rottenFresh (18).jpg
Converted rottenFresh (2).jpg to PNG
Deleted old file: rotten\rottenFresh (2).jpg
Converted rottenFresh (3).jpg to PNG
Deleted old file: rotten\rottenFresh (3).jp

# DataGenerator