In [3]:
import os
import zipfile
import pandas as pd
from PIL import Image
import shutil
from tqdm import tqdm

def extract_dataset(zip_path, extract_path):
    """Extract the downloaded zip file."""
    try:
        with zipfile.ZipFile(zip_path, 'r') as zip_ref:
            zip_ref.testzip()  # Test zip file integrity
            zip_ref.extractall(extract_path)
        print(f"Extraction completed. Files extracted to {extract_path}")
    except zipfile.BadZipFile:
        print(f"Error: The file {zip_path} is not a valid zip file.")
        print("This could be due to an incomplete download or corruption of the file.")
        raise
    except Exception as e:
        print(f"An error occurred while extracting the zip file: {e}")
        raise

def organize_images(source_dir, dest_dir):
    """Organize images into a structured directory."""
    for root, _, files in os.walk(source_dir):
        for file in files:
            if file.lower().endswith(('.png', '.jpg', '.jpeg')):
                category = os.path.basename(root)
                category_dir = os.path.join(dest_dir, category)
                os.makedirs(category_dir, exist_ok=True)
                src_path = os.path.join(root, file)
                dest_path = os.path.join(category_dir, f"{category}_{file}")
                shutil.copy2(src_path, dest_path)
    print(f"Images organized in {dest_dir}")

def create_metadata(image_dir, output_file):
    """Create a CSV file with image metadata."""
    data = []
    for root, _, files in os.walk(image_dir):
        for file in files:
            if file.lower().endswith(('.png', '.jpg', '.jpeg')):
                file_path = os.path.join(root, file)
                category = os.path.basename(os.path.dirname(file_path))
                try:
                    with Image.open(file_path) as img:
                        width, height = img.size
                    data.append({
                        'filename': file,
                        'category': category,
                        'width': width,
                        'height': height,
                        'file_path': file_path
                    })
                except Exception as e:
                    print(f"Error processing {file_path}: {e}")
   
    df = pd.DataFrame(data)
    df.to_csv(output_file, index=False)
    print(f"Metadata created and saved to {output_file}")

def main():
    # Configuration
    base_dir = r"C:\Users\matim\Downloads\Agritech project"
    zip_path = os.path.join(base_dir, "Grapevine_Annotated_Dataset.zip")
    extract_path = os.path.join(base_dir, "raw_dataset")
    organized_dir = os.path.join(base_dir, "organized_dataset")
    metadata_file = os.path.join(base_dir, "image_metadata.csv")

    # Create base directory if it doesn't exist
    os.makedirs(base_dir, exist_ok=True)

    try:
        # Check if the zip file exists
        if not os.path.exists(zip_path):
            print(f"Error: The file {zip_path} does not exist.")
            print("Please download the dataset manually and place it in the specified location.")
            return

        # Extract dataset
        print("Extracting dataset...")
        extract_dataset(zip_path, extract_path)

        # Organize images
        print("Organizing images...")
        organize_images(extract_path, organized_dir)

        # Create metadata
        print("Creating metadata...")
        create_metadata(organized_dir, metadata_file)

        print("Data processing and organization complete!")
    except Exception as e:
        print(f"An error occurred during the process: {e}")
        print("Please check the error message and try again.")

if __name__ == "__main__":
    main()

Extracting dataset...
Extraction completed. Files extracted to C:\Users\matim\Downloads\Agritech project\raw_dataset
Organizing images...
Images organized in C:\Users\matim\Downloads\Agritech project\organized_dataset
Creating metadata...
Metadata created and saved to C:\Users\matim\Downloads\Agritech project\image_metadata.csv
Data processing and organization complete!
