In [1]:
import os
import shutil
from zipfile import ZipFile

# Define input and output paths
input_path = '/kaggle/input/final-dataset'
output_path = '/kaggle/working/combined-dataset'
zip_output_path = '/kaggle/working/combined-dataset.zip'

# Create output folders if they don't exist
os.makedirs(output_path, exist_ok=True)

# Define Train and Test folders
folders = ['Train', 'Test']

# Function to combine datasets based on classes
def combine_datasets(input_path, output_path, folders):
    for folder in folders:
        input_folder_path = os.path.join(input_path, folder)
        output_folder_path = os.path.join(output_path, folder)

        # Ensure the output folder exists
        os.makedirs(output_folder_path, exist_ok=True)

        # Iterate through class folders in the Train/Test folder
        for class_folder in os.listdir(input_folder_path):
            class_input_path = os.path.join(input_folder_path, class_folder)

            # Ensure the class folder exists in the output
            class_output_path = os.path.join(output_folder_path, class_folder)
            os.makedirs(class_output_path, exist_ok=True)

            # Copy all files from the input class folder to the output class folder
            for file in os.listdir(class_input_path):
                src_file_path = os.path.join(class_input_path, file)
                dst_file_path = os.path.join(class_output_path, file)

                # Copy file if it doesn't already exist or overwrite if required
                shutil.copy(src_file_path, dst_file_path)

# Combine datasets
combine_datasets(input_path, output_path, folders)

# Create a ZIP file of the combined dataset
with ZipFile(zip_output_path, 'w') as zipf:
    for root, dirs, files in os.walk(output_path):
        for file in files:
            file_path = os.path.join(root, file)
            arcname = os.path.relpath(file_path, output_path)  # Relative path for ZIP
            zipf.write(file_path, arcname)

print(f"Combined dataset saved as ZIP at: {zip_output_path}")


Combined dataset saved as ZIP at: /kaggle/working/combined-dataset.zip
