In [3]:
import zipfile
import os
from PIL import Image
import numpy as np

# Step 1: Extract the ZIP file
zip_file_path = 'German.zip'  # Replace with your ZIP file path
extract_dir = 'GermanExtractedFiles'  # Directory to extract files

# Extract the ZIP file
if not os.path.exists(extract_dir):  # Avoid re-extraction if already done
    with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
        zip_ref.extractall(extract_dir)
    print(f"ZIP file extracted to '{extract_dir}'!")
else:
    print(f"Directory '{extract_dir}' already exists. Skipping extraction.")

ZIP file extracted to 'GermanExtractedFiles'!


In [23]:
# Correct the dataset path
dataset_path = os.path.join(extract_dir, 'TestIJCNN2013')

# Step 2: Load and preprocess images
image_size = (128, 128)  # Resize all images to 128x128
images = []
labels = []

# Debug: Check if the dataset folder exists
if not os.path.exists(dataset_path):
    print(f"Error: Dataset folder '{dataset_path}' not found!")

# Loop through extracted files
for root, dirs, files in os.walk(dataset_path):
    print(f"Scanning directory: {root}, Found {len(files)} files")  # Debugging line
    for file_name in files:
        if file_name.lower().endswith(('.jpg', '.png','.ppm', '.jpeg')):  # Support more formats
            file_path = os.path.join(root, file_name)

            try:
                # Load the image
                image = Image.open(file_path).convert('RGB')  

                # Resize the image
                image = image.resize(image_size)

                # Convert the image to a NumPy array
                image_array = np.array(image) / 255.0  
                images.append(image_array)

                # Extract label from folder name
                label = os.path.basename(root)  
                labels.append(label)
            except Exception as e:
                print(f"Error loading image {file_path}: {e}")

# Convert to NumPy arrays
images = np.array(images)
labels = np.array(labels)

# Step 3: Print dataset summary
if len(images) > 0:
    print(f"Total images loaded: {len(images)}")
    print(f"Image shape: {images[0].shape} (Each image is resized to {image_size})")
    print(f"Unique labels: {set(labels)}")
else:
    print("No images were loaded. Check the dataset path and file formats.")

Scanning directory: GermanExtractedFiles\TestIJCNN2013, Found 0 files
Scanning directory: GermanExtractedFiles\TestIJCNN2013\TestIJCNN2013Download, Found 301 files
Total images loaded: 300
Image shape: (128, 128, 3) (Each image is resized to (128, 128))
Unique labels: {'TestIJCNN2013Download'}


In [25]:
# Correct the dataset path
dataset_path = os.path.join(extract_dir, 'TrainIJCNN2013')

# Step 2: Load and preprocess images
image_size = (128, 128)  # Resize all images to 128x128
images = []
labels = []

# Debug: Check if the dataset folder exists
if not os.path.exists(dataset_path):
    print(f"Error: Dataset folder '{dataset_path}' not found!")

# Loop through extracted files
for root, dirs, files in os.walk(dataset_path):
    print(f"Scanning directory: {root}, Found {len(files)} files")  # Debugging line
    for file_name in files:
        if file_name.lower().endswith(('.jpg', '.png','.ppm', '.jpeg')):  # Support more formats
            file_path = os.path.join(root, file_name)

            try:
                # Load the image
                image = Image.open(file_path).convert('RGB')  

                # Resize the image
                image = image.resize(image_size)

                # Convert the image to a NumPy array
                image_array = np.array(image) / 255.0  
                images.append(image_array)

                # Extract label from folder name
                label = os.path.basename(root)  
                labels.append(label)
            except Exception as e:
                print(f"Error loading image {file_path}: {e}")

# Convert to NumPy arrays
images = np.array(images)
labels = np.array(labels)

# Step 3: Print dataset summary
if len(images) > 0:
    print(f"Total images loaded: {len(images)}")
    print(f"Image shape: {images[0].shape} (Each image is resized to {image_size})")
    print(f"Unique labels: {set(labels)}")
else:
    print("No images were loaded. Check the dataset path and file formats.")

Scanning directory: GermanExtractedFiles\TrainIJCNN2013, Found 0 files
Scanning directory: GermanExtractedFiles\TrainIJCNN2013\TrainIJCNN2013, Found 603 files
Scanning directory: GermanExtractedFiles\TrainIJCNN2013\TrainIJCNN2013\00, Found 4 files
Scanning directory: GermanExtractedFiles\TrainIJCNN2013\TrainIJCNN2013\01, Found 48 files
Scanning directory: GermanExtractedFiles\TrainIJCNN2013\TrainIJCNN2013\02, Found 59 files
Scanning directory: GermanExtractedFiles\TrainIJCNN2013\TrainIJCNN2013\03, Found 21 files
Scanning directory: GermanExtractedFiles\TrainIJCNN2013\TrainIJCNN2013\04, Found 31 files
Scanning directory: GermanExtractedFiles\TrainIJCNN2013\TrainIJCNN2013\05, Found 37 files
Scanning directory: GermanExtractedFiles\TrainIJCNN2013\TrainIJCNN2013\06, Found 17 files
Scanning directory: GermanExtractedFiles\TrainIJCNN2013\TrainIJCNN2013\07, Found 37 files
Scanning directory: GermanExtractedFiles\TrainIJCNN2013\TrainIJCNN2013\08, Found 47 files
Scanning directory: GermanExtrac