In [52]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import argparse
from tqdm import tqdm
from torch.utils.data import DataLoader
from multi_digit_cnn import MultiDigitCNN  # Import the CNN model
from image_preprocessing import preprocess_image  # Import preprocessing function

In [4]:
# Load SVHN dataset with preprocessing pipeline
transform = transforms.Compose([
    transforms.Grayscale(num_output_channels=1),  # Convert to grayscale
    transforms.Resize((1024, 1024)),  # Resize as per the paper
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5], std=[0.5])  # Normalize to [-1,1]
])

In [None]:
import pandas as pd
import h5py
import numpy as np

# Define path to digitStruct.mat
mat_file_path = "data/train/digitStruct.mat"  # Update with actual path

f = h5py.File(mat_file_path, 'r')
bbox_dataset = f.get('digitStruct/bbox')
num_images = len(bbox_dataset)


def extract_labels(bbox_dataset, img_num):
    bbox_ref = bbox_dataset[img_num][0]
    label_ref = f[bbox_ref]["label"]

    # Handle single-label case (directly stored as a number)
    if label_ref.shape[0] == 1:  # Shape is empty -> single value
        labels = np.array([int(label_ref[()].item())])  # Convert directly
    else:
        # Multiple labels (stored as references)
        labels = np.array([int(f[label_ref[i][0]][()].item()) for i in range(label_ref.shape[0])])
        
    return labels

# Loop through images and print their labels
for i in range(num_images):
    print(f"Num_Image: {i+1}, Labels: {extract_labels(bbox_dataset, i)}")



(2, 1)
Num_Image: 1, Labels: [1, 9]
(2, 1)
Num_Image: 2, Labels: [2, 3]
(2, 1)
Num_Image: 3, Labels: [2, 5]
(2, 1)
Num_Image: 4, Labels: [9, 3]
(2, 1)
Num_Image: 5, Labels: [3, 1]
(2, 1)
Num_Image: 6, Labels: [3, 3]
(2, 1)
Num_Image: 7, Labels: [2, 8]
(3, 1)
Num_Image: 8, Labels: [7, 4, 4]
(3, 1)
Num_Image: 9, Labels: [1, 2, 8]
(2, 1)
Num_Image: 10, Labels: [1, 6]
(2, 1)
Num_Image: 11, Labels: [2, 3]
(2, 1)
Num_Image: 12, Labels: [6, 3]
(2, 1)
Num_Image: 13, Labels: [4, 2]
(2, 1)
Num_Image: 14, Labels: [5, 8]
(2, 1)
Num_Image: 15, Labels: [1, 6]
(2, 1)
Num_Image: 16, Labels: [2, 3]
(2, 1)
Num_Image: 17, Labels: [7, 9]
(2, 1)
Num_Image: 18, Labels: [5, 3]
(3, 1)
Num_Image: 19, Labels: [2, 2, 2]
(2, 1)
Num_Image: 20, Labels: [6, 2]
(1, 1)
Num_Image: 21, Labels: 2
(3, 1)
Num_Image: 22, Labels: [5, 1, 5]
(2, 1)
Num_Image: 23, Labels: [4, 7]
(2, 1)
Num_Image: 24, Labels: [8, 9]
(3, 1)
Num_Image: 25, Labels: [6, 10, 1]
(2, 1)
Num_Image: 26, Labels: [2, 4]
(2, 1)
Num_Image: 27, Labels: [5, 6]

In [54]:
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image
import os

class SVHNDataset(Dataset):
    def __init__(self, root_dir, annotations, transform=None, max_digits=6, pad_value=10):
        """
        Args:
            root_dir (str): Path to the directory containing images.
            annotations (DataFrame): DataFrame with 'filename' and 'labels'.
            transform (callable, optional): Image transformations.
            max_digits (int): Maximum number of digits per image.
            pad_value (int): Value used for padding shorter label sequences.
        """
        self.root_dir = root_dir
        self.annotations = annotations
        self.transform = transform
        self.max_digits = max_digits
        self.pad_value = pad_value

    def __len__(self):
        return len(self.annotations)

    def __getitem__(self, idx):
        # Load image
        img_name = os.path.join(self.root_dir, self.annotations.iloc[idx]["filename"])
        image = Image.open(img_name).convert("RGB")

        # Load multi-digit label
        labels = self.annotations.iloc[idx]["labels"]

        # Convert labels to fixed-length tensor (with padding)
        label_tensor = torch.full((self.max_digits,), self.pad_value, dtype=torch.long)  # Initialize padding
        label_tensor[:len(labels)] = torch.tensor(labels, dtype=torch.long)  # Fill with actual labels

        # Apply transformations
        if self.transform:
            image = self.transform(image)

        return image, label_tensor



In [29]:
# Define dataset directory and DataFrame (df_corrected contains filename-label mapping)
image_dir = "data/train"  # Change this to your actual image directory

# Create dataset
svhn_dataset = SVHNDataset(root_dir=image_dir, annotations=df_corrected, transform=transform)

# Create DataLoader
batch_size = 32
train_loader = DataLoader(svhn_dataset, batch_size=batch_size, shuffle=True, num_workers=4)

# Get a batch to verify
images, labels = next(iter(train_loader))

# Display shapes
print("Image batch shape:", images.shape)  # Expected: (batch_size, 3, 64, 64)
print("Labels batch shape:", labels.shape)  # Expected: (batch_size, max_digits)


NameError: name 'df_corrected' is not defined