# Augmnetation of Dataset

In [1]:
# Importing Libraries

import cv2
import os
import numpy as np
import random
from tqdm import tqdm
from skimage import transform as tf
import shutil

In [2]:
# Define the input and output directories
input_dir = 'C:/Users/Lenovo/Videos/Project/RECIPE GENERATOR/Jupyter/main_dataset'  # Directory containing original images
output_dir = 'C:/Users/Lenovo/Videos/Project/RECIPE GENERATOR/Jupyter/refined_dataset'  # Directory where refined images will be saved

In [3]:
# Create the output directory if it doesn't exist
os.makedirs(output_dir, exist_ok=True)

# Define the list of categories or classes
categories = os.listdir(input_dir)

In [4]:
# Function to perform data augmentation and save refined images with unique names
def augment_and_save_image(image_path, category, output_dir, count):
    # Attempt to load the image
    image = cv2.imread(image_path)
    
    # Check if the image was loaded successfully
    if image is not None:
        # Resize the image
        resized_image = cv2.resize(image, (256, 256))  # Adjust the size as needed
        
        # Flip the image horizontally with a 50% probability
        if random.random() < 0.5:
            resized_image = np.fliplr(resized_image)
        
        # Flip the image vertically with a 50% probability
        if random.random() < 0.5:
            resized_image = np.flipud(resized_image)
        
        # Apply shear transformation
        shear_factor = random.uniform(-0.1, 0.1)
        shear_tf = tf.AffineTransform(shear=shear_factor)
        resized_image = tf.warp(resized_image, shear_tf, preserve_range=True).astype(np.uint8)
        
        # Rotate the image by 90, 180, or 270 degrees with a 25% probability each
        rotate_angle = random.choice([90, 180, 270])
        if rotate_angle > 0:
            rows, cols, _ = resized_image.shape
            M = cv2.getRotationMatrix2D((cols / 2, rows / 2), rotate_angle, 1)
            resized_image = cv2.warpAffine(resized_image, M, (cols, rows))
        
        # Save the refined image with a unique name
        filename = f"{category}{count}.jpg"
        output_path = os.path.join(output_dir, filename)
        os.makedirs(os.path.dirname(output_path), exist_ok=True)
        cv2.imwrite(output_path, resized_image)
    
    else:
        print(f"Error loading image: {image_path}")

In [5]:
# Define the desired number of images per category
desired_images_per_category = 500

# Loop through the categories and process images
for category in categories:
    category_dir = os.path.join(input_dir, category)
    output_category_dir = os.path.join(output_dir, category)
    
    # Create the output directory if it doesn't exist
    os.makedirs(output_category_dir, exist_ok=True)
    
    # Get the current image count in the output directory
    current_images_count = len(os.listdir(output_category_dir))
    
    # Continue augmenting until we reach the desired number of images
    while current_images_count < desired_images_per_category:
        # Use tqdm to display progress
        with tqdm(total=desired_images_per_category - current_images_count, desc=category) as pbar:
            # Loop through the images in the category directory
            for image_name in os.listdir(category_dir):
                image_path = os.path.join(category_dir, image_name)
                
                # Augment and save the image with a unique name
                augment_and_save_image(image_path, category, output_category_dir, current_images_count)
                pbar.update(1)
                current_images_count += 1
                
                # Check if the desired count is reached and break the loop
                if current_images_count >= desired_images_per_category:
                    break

print("Image refinement and data augmentation complete.")

BEAN:   0%|          | 0/500 [00:00<?, ?it/s]

Error loading image: C:/Users/Lenovo/Videos/Project/RECIPE GENERATOR/Jupyter/main_dataset\BEAN\.DS_Store


BEAN:  98%|█████████▊| 491/500 [00:11<00:00, 44.57it/s]
BEAN: 100%|██████████| 9/9 [00:00<00:00, 62.19it/s]


Error loading image: C:/Users/Lenovo/Videos/Project/RECIPE GENERATOR/Jupyter/main_dataset\BEAN\.DS_Store


BEETROOT:  18%|█▊        | 88/500 [00:05<00:26, 15.35it/s]
BEETROOT:  21%|██▏       | 88/412 [00:05<00:21, 15.00it/s]
BEETROOT:  27%|██▋       | 88/324 [00:05<00:14, 16.43it/s]
BEETROOT:  37%|███▋      | 88/236 [00:04<00:08, 18.26it/s]
BEETROOT:  59%|█████▉    | 88/148 [00:05<00:03, 17.45it/s]
BEETROOT: 100%|██████████| 60/60 [00:03<00:00, 17.04it/s]
BITTER GOURD:   1%|▏         | 7/500 [00:00<00:08, 60.89it/s]

Error loading image: C:/Users/Lenovo/Videos/Project/RECIPE GENERATOR/Jupyter/main_dataset\BITTER GOURD\.DS_Store


BITTER GOURD:  91%|█████████ | 454/500 [00:09<00:00, 47.54it/s]
BITTER GOURD:  15%|█▌        | 7/46 [00:00<00:00, 65.55it/s]

Error loading image: C:/Users/Lenovo/Videos/Project/RECIPE GENERATOR/Jupyter/main_dataset\BITTER GOURD\.DS_Store


BITTER GOURD: 100%|██████████| 46/46 [00:00<00:00, 54.51it/s]
BOTTLE GOURD:   1%|          | 6/500 [00:00<00:09, 52.39it/s]

Error loading image: C:/Users/Lenovo/Videos/Project/RECIPE GENERATOR/Jupyter/main_dataset\BOTTLE GOURD\.DS_Store


BOTTLE GOURD:  99%|█████████▉| 494/500 [00:10<00:00, 47.56it/s]
BOTTLE GOURD: 100%|██████████| 6/6 [00:00<00:00, 63.27it/s]


Error loading image: C:/Users/Lenovo/Videos/Project/RECIPE GENERATOR/Jupyter/main_dataset\BOTTLE GOURD\.DS_Store


CABBAGE:   0%|          | 0/500 [00:00<?, ?it/s]

Error loading image: C:/Users/Lenovo/Videos/Project/RECIPE GENERATOR/Jupyter/main_dataset\CABBAGE\.DS_Store


CABBAGE:  16%|█▋        | 82/500 [00:05<00:28, 14.66it/s]
CABBAGE:   1%|          | 4/418 [00:00<00:14, 29.46it/s]

Error loading image: C:/Users/Lenovo/Videos/Project/RECIPE GENERATOR/Jupyter/main_dataset\CABBAGE\.DS_Store


CABBAGE:  20%|█▉        | 82/418 [00:05<00:21, 15.98it/s]
CABBAGE:   1%|          | 4/336 [00:00<00:10, 31.07it/s]

Error loading image: C:/Users/Lenovo/Videos/Project/RECIPE GENERATOR/Jupyter/main_dataset\CABBAGE\.DS_Store


CABBAGE:  24%|██▍       | 82/336 [00:05<00:16, 15.84it/s]
CABBAGE:   2%|▏         | 4/254 [00:00<00:07, 31.69it/s]

Error loading image: C:/Users/Lenovo/Videos/Project/RECIPE GENERATOR/Jupyter/main_dataset\CABBAGE\.DS_Store


CABBAGE:  32%|███▏      | 82/254 [00:05<00:10, 16.36it/s]
CABBAGE:   2%|▏         | 4/172 [00:00<00:05, 31.31it/s]

Error loading image: C:/Users/Lenovo/Videos/Project/RECIPE GENERATOR/Jupyter/main_dataset\CABBAGE\.DS_Store


CABBAGE:  48%|████▊     | 82/172 [00:04<00:05, 16.46it/s]
CABBAGE:   4%|▍         | 4/90 [00:00<00:02, 32.91it/s]

Error loading image: C:/Users/Lenovo/Videos/Project/RECIPE GENERATOR/Jupyter/main_dataset\CABBAGE\.DS_Store


CABBAGE:  91%|█████████ | 82/90 [00:05<00:00, 16.09it/s]
CABBAGE:  50%|█████     | 4/8 [00:00<00:00, 32.06it/s]

Error loading image: C:/Users/Lenovo/Videos/Project/RECIPE GENERATOR/Jupyter/main_dataset\CABBAGE\.DS_Store


CABBAGE: 100%|██████████| 8/8 [00:00<00:00, 24.62it/s]
CAPSICUM:  18%|█▊        | 89/500 [00:04<00:22, 18.25it/s]
CAPSICUM:  22%|██▏       | 89/411 [00:04<00:17, 18.75it/s]
CAPSICUM:  28%|██▊       | 89/322 [00:04<00:12, 18.89it/s]
CAPSICUM:  38%|███▊      | 89/233 [00:04<00:07, 19.43it/s]
CAPSICUM:  62%|██████▏   | 89/144 [00:04<00:02, 18.89it/s]
CAPSICUM: 100%|██████████| 55/55 [00:03<00:00, 17.39it/s]
CARROT:   1%|          | 4/500 [00:00<00:24, 20.32it/s]

Error loading image: C:/Users/Lenovo/Videos/Project/RECIPE GENERATOR/Jupyter/main_dataset\CARROT\.DS_Store


CARROT:  67%|██████▋   | 337/500 [00:20<00:09, 16.42it/s]
CARROT:   0%|          | 0/163 [00:00<?, ?it/s]

Error loading image: C:/Users/Lenovo/Videos/Project/RECIPE GENERATOR/Jupyter/main_dataset\CARROT\.DS_Store


CARROT: 100%|██████████| 163/163 [00:07<00:00, 21.41it/s]
CAULIFLOWER:  58%|█████▊    | 290/500 [00:11<00:08, 25.93it/s]
CAULIFLOWER: 100%|██████████| 210/210 [00:08<00:00, 24.67it/s]
CHILLI:   1%|          | 5/500 [00:00<00:10, 45.16it/s]

Error loading image: C:/Users/Lenovo/Videos/Project/RECIPE GENERATOR/Jupyter/main_dataset\CHILLI\.DS_Store


CHILLI:  26%|██▌       | 131/500 [00:03<00:09, 38.24it/s]
CHILLI:   1%|▏         | 5/369 [00:00<00:08, 45.14it/s]

Error loading image: C:/Users/Lenovo/Videos/Project/RECIPE GENERATOR/Jupyter/main_dataset\CHILLI\.DS_Store


CHILLI:  36%|███▌      | 131/369 [00:03<00:06, 38.87it/s]
CHILLI:   2%|▏         | 5/238 [00:00<00:05, 43.43it/s]

Error loading image: C:/Users/Lenovo/Videos/Project/RECIPE GENERATOR/Jupyter/main_dataset\CHILLI\.DS_Store


CHILLI:  55%|█████▌    | 131/238 [00:03<00:02, 36.86it/s]
CHILLI:   4%|▎         | 4/107 [00:00<00:02, 36.05it/s]

Error loading image: C:/Users/Lenovo/Videos/Project/RECIPE GENERATOR/Jupyter/main_dataset\CHILLI\.DS_Store


CHILLI: 100%|██████████| 107/107 [00:03<00:00, 35.48it/s]
CORN:   0%|          | 2/500 [00:00<00:26, 18.91it/s]

Error loading image: C:/Users/Lenovo/Videos/Project/RECIPE GENERATOR/Jupyter/main_dataset\CORN\.DS_Store


CORN:  17%|█▋        | 86/500 [00:06<00:31, 13.08it/s]
CORN:   1%|          | 3/414 [00:00<00:20, 20.08it/s]

Error loading image: C:/Users/Lenovo/Videos/Project/RECIPE GENERATOR/Jupyter/main_dataset\CORN\.DS_Store


CORN:  21%|██        | 86/414 [00:06<00:24, 13.54it/s]
CORN:   1%|          | 3/328 [00:00<00:15, 20.32it/s]

Error loading image: C:/Users/Lenovo/Videos/Project/RECIPE GENERATOR/Jupyter/main_dataset\CORN\.DS_Store


CORN:  26%|██▌       | 86/328 [00:06<00:18, 13.16it/s]
CORN:   1%|          | 3/242 [00:00<00:10, 22.82it/s]

Error loading image: C:/Users/Lenovo/Videos/Project/RECIPE GENERATOR/Jupyter/main_dataset\CORN\.DS_Store


CORN:  36%|███▌      | 86/242 [00:06<00:11, 13.95it/s]
CORN:   2%|▏         | 3/156 [00:00<00:06, 24.26it/s]

Error loading image: C:/Users/Lenovo/Videos/Project/RECIPE GENERATOR/Jupyter/main_dataset\CORN\.DS_Store


CORN:  55%|█████▌    | 86/156 [00:06<00:05, 13.80it/s]
CORN:   4%|▍         | 3/70 [00:00<00:02, 23.87it/s]

Error loading image: C:/Users/Lenovo/Videos/Project/RECIPE GENERATOR/Jupyter/main_dataset\CORN\.DS_Store


CORN: 100%|██████████| 70/70 [00:04<00:00, 15.10it/s]
CUCUMBER:   1%|          | 6/500 [00:00<00:10, 49.29it/s]

Error loading image: C:/Users/Lenovo/Videos/Project/RECIPE GENERATOR/Jupyter/main_dataset\CUCUMBER\.DS_Store


CUCUMBER:  90%|████████▉ | 449/500 [00:22<00:02, 19.75it/s]
CUCUMBER:  12%|█▏        | 6/51 [00:00<00:00, 53.49it/s]

Error loading image: C:/Users/Lenovo/Videos/Project/RECIPE GENERATOR/Jupyter/main_dataset\CUCUMBER\.DS_Store


CUCUMBER: 100%|██████████| 51/51 [00:01<00:00, 26.43it/s]
EGG:   1%|          | 5/500 [00:00<00:10, 48.31it/s]

Error loading image: C:/Users/Lenovo/Videos/Project/RECIPE GENERATOR/Jupyter/main_dataset\EGG\.DS_Store


EGG:   3%|▎         | 16/500 [00:00<00:11, 40.68it/s]

Error loading image: C:/Users/Lenovo/Videos/Project/RECIPE GENERATOR/Jupyter/main_dataset\EGG\109.jpg


EGG:  82%|████████▏ | 409/500 [00:11<00:02, 35.68it/s]
EGG:   7%|▋         | 6/91 [00:00<00:01, 48.19it/s]

Error loading image: C:/Users/Lenovo/Videos/Project/RECIPE GENERATOR/Jupyter/main_dataset\EGG\.DS_Store


EGG:  18%|█▊        | 16/91 [00:00<00:01, 39.48it/s]

Error loading image: C:/Users/Lenovo/Videos/Project/RECIPE GENERATOR/Jupyter/main_dataset\EGG\109.jpg


EGG: 100%|██████████| 91/91 [00:03<00:00, 29.41it/s]
EGGPLANT:  17%|█▋        | 84/500 [00:05<00:28, 14.78it/s]
EGGPLANT:  20%|██        | 84/416 [00:05<00:21, 15.35it/s]
EGGPLANT:  25%|██▌       | 84/332 [00:05<00:16, 15.04it/s]
EGGPLANT:  34%|███▍      | 84/248 [00:05<00:10, 15.03it/s]
EGGPLANT:  51%|█████     | 84/164 [00:05<00:05, 15.30it/s]
EGGPLANT: 100%|██████████| 80/80 [00:05<00:00, 14.87it/s]
GARLIC:   1%|          | 6/500 [00:00<00:09, 51.42it/s]

Error loading image: C:/Users/Lenovo/Videos/Project/RECIPE GENERATOR/Jupyter/main_dataset\GARLIC\.DS_Store


GARLIC:  96%|█████████▌| 479/500 [00:16<00:00, 28.66it/s]
GARLIC:  33%|███▎      | 7/21 [00:00<00:00, 62.62it/s]

Error loading image: C:/Users/Lenovo/Videos/Project/RECIPE GENERATOR/Jupyter/main_dataset\GARLIC\.DS_Store


GARLIC: 100%|██████████| 21/21 [00:00<00:00, 48.40it/s]
GINGER:   0%|          | 0/500 [00:00<?, ?it/s]

Error loading image: C:/Users/Lenovo/Videos/Project/RECIPE GENERATOR/Jupyter/main_dataset\GINGER\.DS_Store


GINGER:  13%|█▎        | 67/500 [00:03<00:25, 17.02it/s]
GINGER:   0%|          | 0/433 [00:00<?, ?it/s]

Error loading image: C:/Users/Lenovo/Videos/Project/RECIPE GENERATOR/Jupyter/main_dataset\GINGER\.DS_Store


GINGER:  15%|█▌        | 67/433 [00:03<00:20, 17.72it/s]
GINGER:   0%|          | 0/366 [00:00<?, ?it/s]

Error loading image: C:/Users/Lenovo/Videos/Project/RECIPE GENERATOR/Jupyter/main_dataset\GINGER\.DS_Store


GINGER:  18%|█▊        | 67/366 [00:03<00:17, 17.13it/s]
GINGER:   1%|          | 3/299 [00:00<00:18, 15.71it/s]

Error loading image: C:/Users/Lenovo/Videos/Project/RECIPE GENERATOR/Jupyter/main_dataset\GINGER\.DS_Store


GINGER:  22%|██▏       | 67/299 [00:03<00:13, 17.81it/s]
GINGER:   1%|▏         | 3/232 [00:00<00:14, 15.74it/s]

Error loading image: C:/Users/Lenovo/Videos/Project/RECIPE GENERATOR/Jupyter/main_dataset\GINGER\.DS_Store


GINGER:  29%|██▉       | 67/232 [00:03<00:09, 17.99it/s]
GINGER:   2%|▏         | 3/165 [00:00<00:10, 15.14it/s]

Error loading image: C:/Users/Lenovo/Videos/Project/RECIPE GENERATOR/Jupyter/main_dataset\GINGER\.DS_Store


GINGER:  41%|████      | 67/165 [00:03<00:05, 17.71it/s]
GINGER:   3%|▎         | 3/98 [00:00<00:06, 15.62it/s]

Error loading image: C:/Users/Lenovo/Videos/Project/RECIPE GENERATOR/Jupyter/main_dataset\GINGER\.DS_Store


GINGER:  68%|██████▊   | 67/98 [00:03<00:01, 17.74it/s]
GINGER:   0%|          | 0/31 [00:00<?, ?it/s]

Error loading image: C:/Users/Lenovo/Videos/Project/RECIPE GENERATOR/Jupyter/main_dataset\GINGER\.DS_Store


GINGER: 100%|██████████| 31/31 [00:01<00:00, 17.03it/s]
LEMON:   0%|          | 0/500 [00:00<?, ?it/s]

Error loading image: C:/Users/Lenovo/Videos/Project/RECIPE GENERATOR/Jupyter/main_dataset\LEMON\.DS_Store


LEMON:  16%|█▌        | 80/500 [00:06<00:31, 13.25it/s]
LEMON:   0%|          | 0/420 [00:00<?, ?it/s]

Error loading image: C:/Users/Lenovo/Videos/Project/RECIPE GENERATOR/Jupyter/main_dataset\LEMON\.DS_Store


LEMON:  19%|█▉        | 80/420 [00:06<00:26, 13.02it/s]
LEMON:   0%|          | 0/340 [00:00<?, ?it/s]

Error loading image: C:/Users/Lenovo/Videos/Project/RECIPE GENERATOR/Jupyter/main_dataset\LEMON\.DS_Store


LEMON:  24%|██▎       | 80/340 [00:06<00:19, 13.10it/s]
LEMON:   0%|          | 0/260 [00:00<?, ?it/s]

Error loading image: C:/Users/Lenovo/Videos/Project/RECIPE GENERATOR/Jupyter/main_dataset\LEMON\.DS_Store


LEMON:  31%|███       | 80/260 [00:06<00:14, 12.52it/s]
LEMON:   0%|          | 0/180 [00:00<?, ?it/s]

Error loading image: C:/Users/Lenovo/Videos/Project/RECIPE GENERATOR/Jupyter/main_dataset\LEMON\.DS_Store


LEMON:  44%|████▍     | 80/180 [00:06<00:07, 13.17it/s]
LEMON:   0%|          | 0/100 [00:00<?, ?it/s]

Error loading image: C:/Users/Lenovo/Videos/Project/RECIPE GENERATOR/Jupyter/main_dataset\LEMON\.DS_Store


LEMON:  80%|████████  | 80/100 [00:06<00:01, 13.20it/s]
LEMON:   0%|          | 0/20 [00:00<?, ?it/s]

Error loading image: C:/Users/Lenovo/Videos/Project/RECIPE GENERATOR/Jupyter/main_dataset\LEMON\.DS_Store


LEMON: 100%|██████████| 20/20 [00:01<00:00, 10.68it/s]
LETTUCE:   1%|          | 3/500 [00:00<00:22, 22.11it/s]

Error loading image: C:/Users/Lenovo/Videos/Project/RECIPE GENERATOR/Jupyter/main_dataset\LETTUCE\.DS_Store


LETTUCE:  19%|█▉        | 94/500 [00:07<00:34, 11.94it/s]
LETTUCE:   1%|          | 3/406 [00:00<00:18, 21.36it/s]

Error loading image: C:/Users/Lenovo/Videos/Project/RECIPE GENERATOR/Jupyter/main_dataset\LETTUCE\.DS_Store


LETTUCE:  23%|██▎       | 94/406 [00:08<00:27, 11.44it/s]
LETTUCE:   1%|          | 3/312 [00:00<00:16, 18.56it/s]

Error loading image: C:/Users/Lenovo/Videos/Project/RECIPE GENERATOR/Jupyter/main_dataset\LETTUCE\.DS_Store


LETTUCE:  30%|███       | 94/312 [00:08<00:18, 11.48it/s]
LETTUCE:   1%|▏         | 3/218 [00:00<00:09, 22.12it/s]

Error loading image: C:/Users/Lenovo/Videos/Project/RECIPE GENERATOR/Jupyter/main_dataset\LETTUCE\.DS_Store


LETTUCE:  43%|████▎     | 94/218 [00:07<00:10, 12.30it/s]
LETTUCE:   2%|▏         | 3/124 [00:00<00:05, 22.43it/s]

Error loading image: C:/Users/Lenovo/Videos/Project/RECIPE GENERATOR/Jupyter/main_dataset\LETTUCE\.DS_Store


LETTUCE:  76%|███████▌  | 94/124 [00:07<00:02, 11.94it/s]
LETTUCE:  10%|█         | 3/30 [00:00<00:01, 22.82it/s]

Error loading image: C:/Users/Lenovo/Videos/Project/RECIPE GENERATOR/Jupyter/main_dataset\LETTUCE\.DS_Store


LETTUCE: 100%|██████████| 30/30 [00:03<00:00,  9.47it/s]
OKRA:   1%|          | 6/500 [00:00<00:09, 53.91it/s]

Error loading image: C:/Users/Lenovo/Videos/Project/RECIPE GENERATOR/Jupyter/main_dataset\OKRA\.DS_Store


OKRA:  43%|████▎     | 216/500 [00:05<00:08, 34.61it/s]

Error loading image: C:/Users/Lenovo/Videos/Project/RECIPE GENERATOR/Jupyter/main_dataset\OKRA\471.gif


OKRA:  50%|█████     | 251/500 [00:06<00:06, 40.51it/s]
OKRA:   3%|▎         | 7/249 [00:00<00:04, 57.87it/s]

Error loading image: C:/Users/Lenovo/Videos/Project/RECIPE GENERATOR/Jupyter/main_dataset\OKRA\.DS_Store


OKRA:  87%|████████▋ | 217/249 [00:05<00:00, 42.64it/s]

Error loading image: C:/Users/Lenovo/Videos/Project/RECIPE GENERATOR/Jupyter/main_dataset\OKRA\471.gif


OKRA: 100%|██████████| 249/249 [00:05<00:00, 42.03it/s]
ONION:   1%|          | 6/500 [00:00<00:09, 51.68it/s]

Error loading image: C:/Users/Lenovo/Videos/Project/RECIPE GENERATOR/Jupyter/main_dataset\ONION\.DS_Store


ONION:  46%|████▌     | 229/500 [00:09<00:04, 54.37it/s]

Error loading image: C:/Users/Lenovo/Videos/Project/RECIPE GENERATOR/Jupyter/main_dataset\ONION\41.gif


ONION:  68%|██████▊   | 342/500 [00:12<00:05, 26.96it/s]
ONION:   4%|▍         | 6/158 [00:00<00:03, 50.44it/s]

Error loading image: C:/Users/Lenovo/Videos/Project/RECIPE GENERATOR/Jupyter/main_dataset\ONION\.DS_Store


ONION: 100%|██████████| 158/158 [00:08<00:00, 18.80it/s]
PEAS:  20%|██        | 100/500 [00:07<00:30, 12.91it/s]
PEAS:  25%|██▌       | 100/400 [00:07<00:22, 13.15it/s]
PEAS:  33%|███▎      | 100/300 [00:07<00:14, 13.54it/s]
PEAS:  50%|█████     | 100/200 [00:07<00:07, 13.34it/s]
PEAS: 100%|██████████| 100/100 [00:07<00:00, 13.47it/s]
POTATO:   1%|          | 5/500 [00:00<00:12, 40.60it/s]

Error loading image: C:/Users/Lenovo/Videos/Project/RECIPE GENERATOR/Jupyter/main_dataset\POTATO\.DS_Store


POTATO:  15%|█▌        | 76/500 [00:03<00:19, 21.59it/s]
POTATO:   1%|          | 5/424 [00:00<00:09, 43.57it/s]

Error loading image: C:/Users/Lenovo/Videos/Project/RECIPE GENERATOR/Jupyter/main_dataset\POTATO\.DS_Store


POTATO:  18%|█▊        | 76/424 [00:03<00:16, 21.49it/s]
POTATO:   1%|▏         | 5/348 [00:00<00:08, 39.67it/s]

Error loading image: C:/Users/Lenovo/Videos/Project/RECIPE GENERATOR/Jupyter/main_dataset\POTATO\.DS_Store


POTATO:  22%|██▏       | 76/348 [00:03<00:12, 21.02it/s]
POTATO:   1%|▏         | 4/272 [00:00<00:07, 37.39it/s]

Error loading image: C:/Users/Lenovo/Videos/Project/RECIPE GENERATOR/Jupyter/main_dataset\POTATO\.DS_Store


POTATO:  28%|██▊       | 76/272 [00:03<00:09, 20.87it/s]
POTATO:   3%|▎         | 5/196 [00:00<00:04, 45.31it/s]

Error loading image: C:/Users/Lenovo/Videos/Project/RECIPE GENERATOR/Jupyter/main_dataset\POTATO\.DS_Store


POTATO:  39%|███▉      | 76/196 [00:03<00:05, 21.88it/s]
POTATO:   4%|▍         | 5/120 [00:00<00:02, 46.48it/s]

Error loading image: C:/Users/Lenovo/Videos/Project/RECIPE GENERATOR/Jupyter/main_dataset\POTATO\.DS_Store


POTATO:  63%|██████▎   | 76/120 [00:03<00:02, 20.90it/s]
POTATO:  11%|█▏        | 5/44 [00:00<00:01, 38.91it/s]

Error loading image: C:/Users/Lenovo/Videos/Project/RECIPE GENERATOR/Jupyter/main_dataset\POTATO\.DS_Store


POTATO: 100%|██████████| 44/44 [00:01<00:00, 22.11it/s]
PUMPKIN:   2%|▏         | 8/500 [00:00<00:06, 70.62it/s]

Error loading image: C:/Users/Lenovo/Videos/Project/RECIPE GENERATOR/Jupyter/main_dataset\PUMPKIN\.DS_Store


PUMPKIN:  93%|█████████▎| 466/500 [00:09<00:00, 49.18it/s]
PUMPKIN:  18%|█▊        | 6/34 [00:00<00:00, 56.28it/s]

Error loading image: C:/Users/Lenovo/Videos/Project/RECIPE GENERATOR/Jupyter/main_dataset\PUMPKIN\.DS_Store


PUMPKIN: 100%|██████████| 34/34 [00:00<00:00, 49.25it/s]
RADDISH:   6%|▌         | 30/500 [00:00<00:15, 30.35it/s]
RADDISH:   6%|▋         | 30/470 [00:01<00:15, 29.03it/s]
RADDISH:   7%|▋         | 30/440 [00:00<00:12, 32.02it/s]
RADDISH:   7%|▋         | 30/410 [00:00<00:11, 32.31it/s]
RADDISH:   8%|▊         | 30/380 [00:00<00:10, 32.64it/s]
RADDISH:   9%|▊         | 30/350 [00:00<00:09, 33.54it/s]
RADDISH:   9%|▉         | 30/320 [00:00<00:08, 34.17it/s]
RADDISH:  10%|█         | 30/290 [00:00<00:07, 33.52it/s]
RADDISH:  12%|█▏        | 30/260 [00:00<00:06, 33.58it/s]
RADDISH:  13%|█▎        | 30/230 [00:00<00:05, 35.67it/s]
RADDISH:  15%|█▌        | 30/200 [00:00<00:04, 35.62it/s]
RADDISH:  18%|█▊        | 30/170 [00:00<00:04, 32.92it/s]
RADDISH:  21%|██▏       | 30/140 [00:00<00:03, 33.01it/s]
RADDISH:  27%|██▋       | 30/110 [00:00<00:02, 31.85it/s]
RADDISH:  38%|███▊      | 30/80 [00:00<00:01, 31.11it/s]
RADDISH:  60%|██████    | 30/50 [00:00<00:00, 33.73it/s]
RADDISH: 100%|███

Error loading image: C:/Users/Lenovo/Videos/Project/RECIPE GENERATOR/Jupyter/main_dataset\SPINACH\.DS_Store


SPINACH:  19%|█▉        | 97/500 [00:06<00:27, 14.83it/s]
SPINACH:   1%|          | 3/403 [00:00<00:17, 22.61it/s]

Error loading image: C:/Users/Lenovo/Videos/Project/RECIPE GENERATOR/Jupyter/main_dataset\SPINACH\.DS_Store


SPINACH:  24%|██▍       | 97/403 [00:06<00:20, 14.94it/s]
SPINACH:   1%|          | 3/306 [00:00<00:12, 24.12it/s]

Error loading image: C:/Users/Lenovo/Videos/Project/RECIPE GENERATOR/Jupyter/main_dataset\SPINACH\.DS_Store


SPINACH:  32%|███▏      | 97/306 [00:06<00:13, 15.95it/s]
SPINACH:   1%|▏         | 3/209 [00:00<00:08, 24.64it/s]

Error loading image: C:/Users/Lenovo/Videos/Project/RECIPE GENERATOR/Jupyter/main_dataset\SPINACH\.DS_Store


SPINACH:  46%|████▋     | 97/209 [00:06<00:07, 15.30it/s]
SPINACH:   2%|▏         | 2/112 [00:00<00:05, 19.41it/s]

Error loading image: C:/Users/Lenovo/Videos/Project/RECIPE GENERATOR/Jupyter/main_dataset\SPINACH\.DS_Store


SPINACH:  87%|████████▋ | 97/112 [00:06<00:00, 15.40it/s]
SPINACH:  13%|█▎        | 2/15 [00:00<00:00, 19.61it/s]

Error loading image: C:/Users/Lenovo/Videos/Project/RECIPE GENERATOR/Jupyter/main_dataset\SPINACH\.DS_Store


SPINACH: 100%|██████████| 15/15 [00:01<00:00, 10.54it/s]
TOMATO:   1%|          | 3/500 [00:00<00:18, 26.89it/s]

Error loading image: C:/Users/Lenovo/Videos/Project/RECIPE GENERATOR/Jupyter/main_dataset\TOMATO\.DS_Store


TOMATO:  18%|█▊        | 90/500 [00:05<00:24, 16.60it/s]
TOMATO:   1%|          | 3/410 [00:00<00:14, 27.68it/s]

Error loading image: C:/Users/Lenovo/Videos/Project/RECIPE GENERATOR/Jupyter/main_dataset\TOMATO\.DS_Store


TOMATO:  22%|██▏       | 90/410 [00:05<00:18, 16.98it/s]
TOMATO:   1%|          | 3/320 [00:00<00:12, 25.87it/s]

Error loading image: C:/Users/Lenovo/Videos/Project/RECIPE GENERATOR/Jupyter/main_dataset\TOMATO\.DS_Store


TOMATO:  28%|██▊       | 90/320 [00:05<00:13, 16.91it/s]
TOMATO:   1%|▏         | 3/230 [00:00<00:07, 29.71it/s]

Error loading image: C:/Users/Lenovo/Videos/Project/RECIPE GENERATOR/Jupyter/main_dataset\TOMATO\.DS_Store


TOMATO:  39%|███▉      | 90/230 [00:05<00:08, 16.92it/s]
TOMATO:   2%|▏         | 3/140 [00:00<00:04, 27.94it/s]

Error loading image: C:/Users/Lenovo/Videos/Project/RECIPE GENERATOR/Jupyter/main_dataset\TOMATO\.DS_Store


TOMATO:  64%|██████▍   | 90/140 [00:05<00:03, 16.40it/s]
TOMATO:   6%|▌         | 3/50 [00:00<00:01, 25.37it/s]

Error loading image: C:/Users/Lenovo/Videos/Project/RECIPE GENERATOR/Jupyter/main_dataset\TOMATO\.DS_Store


TOMATO: 100%|██████████| 50/50 [00:03<00:00, 15.94it/s]
TURNIP:  20%|█▉        | 98/500 [00:05<00:20, 19.37it/s]
TURNIP:  24%|██▍       | 98/402 [00:04<00:15, 20.19it/s]
TURNIP:  32%|███▏      | 98/304 [00:04<00:10, 19.80it/s]
TURNIP:  48%|████▊     | 98/206 [00:04<00:05, 20.33it/s]
TURNIP:  91%|█████████ | 98/108 [00:04<00:00, 21.78it/s]
TURNIP: 100%|██████████| 10/10 [00:00<00:00, 15.34it/s]

Image refinement and data augmentation complete.





## Splitting Dataset

In [6]:
# Importing required Libraries

import os
from sklearn.model_selection import train_test_split
from tqdm import tqdm
import shutil

In [7]:
# Define the path to refined dataset root directory (containing categorical image folders)
dataset_root = 'C:/Users/Lenovo/Videos/Project/RECIPE GENERATOR/Jupyter/refined_dataset' 

# Define the directory where split dataset will be saved
split_dataset_root = 'C:/Users/Lenovo/Videos/Project/RECIPE GENERATOR/Jupyter/split_refined_data'

In [8]:
# Define the ratios for training, testing, and validation (e.g., 80% for training, 15% for testing, 15% for validation)
train_ratio = 0.8
test_ratio = 0.1
valid_ratio = 0.1

# List all the category folders in the dataset directory
categories = os.listdir(dataset_root)

# Create directories for the training, testing, and validation sets
train_dir = os.path.join(split_dataset_root, 'train')
test_dir = os.path.join(split_dataset_root, 'test')
valid_dir = os.path.join(split_dataset_root, 'valid')

os.makedirs(train_dir, exist_ok=True)
os.makedirs(test_dir, exist_ok=True)
os.makedirs(valid_dir, exist_ok=True)

In [9]:
# Iterate through each category folder and split the images
for category_name in tqdm(categories):
    category_dir = os.path.join(dataset_root, category_name)
    images = os.listdir(category_dir)

    # Split the images into training, testing, and validation sets
    train_images, temp_images = train_test_split(images, train_size=train_ratio, random_state=42)
    test_images, valid_images = train_test_split(temp_images, train_size=test_ratio / (test_ratio + valid_ratio), random_state=42)

    # Create directories for the category in the training, testing, and validation sets
    train_category_dir = os.path.join(train_dir, category_name)
    test_category_dir = os.path.join(test_dir, category_name)
    valid_category_dir = os.path.join(valid_dir, category_name)

    os.makedirs(train_category_dir, exist_ok=True)
    os.makedirs(test_category_dir, exist_ok=True)
    os.makedirs(valid_category_dir, exist_ok=True)

    # Move the images to the appropriate directories
    for image in train_images:
        src = os.path.join(category_dir, image)
        dst = os.path.join(train_category_dir, image)
        shutil.copy(src, dst)

    for image in test_images:
        src = os.path.join(category_dir, image)
        dst = os.path.join(test_category_dir, image)
        shutil.copy(src, dst)

    for image in valid_images:
        src = os.path.join(category_dir, image)
        dst = os.path.join(valid_category_dir, image)
        shutil.copy(src, dst)

print("Dataset split into training, testing, and validation sets.")

100%|██████████| 26/26 [01:41<00:00,  3.90s/it]

Dataset split into training, testing, and validation sets.





## Creating CSV file for dataset

In [13]:
import os
import csv
from tqdm import tqdm

In [17]:
# Define the directory where your dataset is located
dataset_dir = 'C:/Users/Lenovo/Downloads/AI Project/main/refined_dataset' 

# Define the path to save the CSV file
csv_file = 'C:/Users/Lenovo/Downloads/AI Project/main/csv file/refined_dataset.csv'

# Define categories
categories = os.listdir(dataset_dir)

# Create or open the CSV file for writing
file_exists = os.path.exists(csv_file)

In [18]:
with open(csv_file, mode='a', newline='') as csv_file:
    fieldnames = ['image_id', 'Category']
    writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
    
    if not file_exists:
        writer.writeheader()

    for category in tqdm(categories):
        category_dir = os.path.join(dataset_dir, category)

        if os.path.isdir(category_dir):
            image_files = os.listdir(category_dir)

            for image_file in tqdm(image_files):
                image_id = image_file
                category_name = category

                # Write the information to the CSV file
                writer.writerow({
                    'image_id': image_id,
                    'Category': category_name
                })

  0%|          | 0/26 [00:00<?, ?it/s]
100%|██████████| 498/498 [00:00<00:00, 166130.87it/s]

100%|██████████| 500/500 [00:00<00:00, 249958.52it/s]

100%|██████████| 498/498 [00:00<00:00, 146394.97it/s]

100%|██████████| 498/498 [00:00<00:00, 138982.19it/s]

100%|██████████| 493/493 [00:00<00:00, 246606.07it/s]

100%|██████████| 500/500 [00:00<00:00, 247189.06it/s]

100%|██████████| 498/498 [00:00<00:00, 249226.03it/s]

100%|██████████| 500/500 [00:00<00:00, 250376.31it/s]

100%|██████████| 496/496 [00:00<00:00, 494033.43it/s]

100%|██████████| 494/494 [00:00<00:00, 202421.47it/s]

100%|██████████| 498/498 [00:00<00:00, 124561.00it/s]

100%|██████████| 496/496 [00:00<00:00, 247929.30it/s]

100%|██████████| 500/500 [00:00<00:00, 224630.68it/s]

100%|██████████| 498/498 [00:00<00:00, 221337.65it/s]
 54%|█████▍    | 14/26 [00:00<00:00, 135.18it/s]
100%|██████████| 492/492 [00:00<00:00, 245812.69it/s]

100%|██████████| 493/493 [00:00<00:00, 123317.74it/s]

100%|██████████| 494/494 [00:00<0