In [None]:
import torch
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, Dataset
import numpy as np
from PIL import Image

In [None]:
# Device configuration
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

In [None]:
# Hyperparameters
num_epochs = 10
batch_size = 4
learning_rate = 0.005

In [None]:
# Step 1: Prepare MNIST with Synthetic Bounding Boxes
# Generate synthetic bounding boxes for MNIST
def create_synthetic_annotations(dataset):
    annotations = []
    for idx, (image, label) in enumerate(dataset):
        # Fake bounding box for the entire image
        box = [0, 0, 28, 28]  # Simulated full-image box
        annotations.append({
            "boxes": torch.tensor([box], dtype=torch.float32),  # [x_min, y_min, x_max, y_max]
            "labels": torch.tensor([label], dtype=torch.int64)  # Single label per image
        })
    return annotations

# Load MNIST Dataset
train_dataset = datasets.MNIST(root='./data', train=True, download=True, transform=transforms.ToTensor())
test_dataset = datasets.MNIST(root='./data', train=False, download=True, transform=transforms.ToTensor())

train_annotations = create_synthetic_annotations(train_dataset)
test_annotations = create_synthetic_annotations(test_dataset)

# Custom Dataset Class
class CustomDataset(Dataset):
    def __init__(self, images, annotations, transforms=None):
        self.images = images
        self.annotations = annotations
        self.transforms = transforms

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        image = self.images[idx].numpy()
        image = Image.fromarray((image[0] * 255).astype('uint8')).convert("RGB")
        target = self.annotations[idx]

        if self.transforms:
            image = self.transforms(image)

        return image, target

# Define transforms
transform = transforms.Compose([
    transforms.ToTensor(),
])

# Create datasets
train_dataset_with_boxes = CustomDataset(
    images=train_dataset.data,
    annotations=train_annotations,
    transforms=transform
)

test_dataset_with_boxes = CustomDataset(
    images=test_dataset.data,
    annotations=test_annotations,
    transforms=transform
)

# Collate function for DataLoader
def collate_fn(batch):
    return tuple(zip(*batch))

# Data loaders
train_loader = DataLoader(dataset=train_dataset_with_boxes, batch_size=batch_size, shuffle=True, collate_fn=collate_fn)
test_loader = DataLoader(dataset=test_dataset_with_boxes, batch_size=batch_size, shuffle=False, collate_fn=collate_fn)

In [None]:
# Step 2: Load Faster R-CNN Model
# Load a pre-trained Faster R-CNN model
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)

# Replace the classifier head
num_classes = 11  # 10 digits + 1 background class
in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

model.to(device)

In [None]:
# Step 3: Define Optimizer
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, momentum=0.9, weight_decay=0.0005)

In [None]:
# Step 4: Training Loop
for epoch in range(num_epochs):
    model.train()
    total_loss = 0

    for images, targets in train_loader:
        images = list(image.to(device) for image in images)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        # Forward pass and compute loss
        loss_dict = model(images, targets)
        losses = sum(loss for loss in loss_dict.values())

        optimizer.zero_grad()
        losses.backward()
        optimizer.step()

        total_loss += losses.item()

    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {total_loss:.4f}")


In [None]:
# Step 5: Evaluation
model.eval()
all_preds = []
all_targets = []

with torch.no_grad():
    for images, targets in test_loader:
        images = list(image.to(device) for image in images)
        outputs = model(images)

        for output, target in zip(outputs, targets):
            all_preds.append(output["labels"].cpu().numpy())
            all_targets.append(target["labels"].cpu().numpy())

In [None]:
import torch
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, Dataset
import numpy as np
from PIL import Image

# Device configuration
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

# Hyperparameters
num_epochs = 10
batch_size = 4
learning_rate = 0.005

# Step 1: Prepare MNIST with Synthetic Bounding Boxes
# Generate synthetic bounding boxes for MNIST
def create_synthetic_annotations(dataset):
    annotations = []
    for idx, (image, label) in enumerate(dataset):
        # Fake bounding box for the entire image
        box = [0, 0, 28, 28]  # Simulated full-image box
        annotations.append({
            "boxes": torch.tensor([box], dtype=torch.float32),  # [x_min, y_min, x_max, y_max]
            "labels": torch.tensor([label], dtype=torch.int64)  # Single label per image
        })
    return annotations

# Load MNIST Dataset
train_dataset = datasets.MNIST(root='./data', train=True, download=True, transform=transforms.ToTensor())
test_dataset = datasets.MNIST(root='./data', train=False, download=True, transform=transforms.ToTensor())

train_annotations = create_synthetic_annotations(train_dataset)
test_annotations = create_synthetic_annotations(test_dataset)

# Custom Dataset Class
class CustomDataset(Dataset):
    def __init__(self, images, annotations, transforms=None):
        self.images = images
        self.annotations = annotations
        self.transforms = transforms

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        image = self.images[idx].numpy()
        image = Image.fromarray((image[0] * 255).astype('uint8')).convert("RGB")
        target = self.annotations[idx]

        if self.transforms:
            image = self.transforms(image)

        return image, target

# Define transforms
transform = transforms.Compose([
    transforms.ToTensor(),
])

# Create datasets
train_dataset_with_boxes = CustomDataset(
    images=train_dataset.data,
    annotations=train_annotations,
    transforms=transform
)

test_dataset_with_boxes = CustomDataset(
    images=test_dataset.data,
    annotations=test_annotations,
    transforms=transform
)

# Collate function for DataLoader
def collate_fn(batch):
    return tuple(zip(*batch))

# Data loaders
train_loader = DataLoader(dataset=train_dataset_with_boxes, batch_size=batch_size, shuffle=True, collate_fn=collate_fn)
test_loader = DataLoader(dataset=test_dataset_with_boxes, batch_size=batch_size, shuffle=False, collate_fn=collate_fn)

# Step 2: Load Faster R-CNN Model
# Load a pre-trained Faster R-CNN model
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)

# Replace the classifier head
num_classes = 11  # 10 digits + 1 background class
in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

model.to(device)

# Step 3: Define Optimizer
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, momentum=0.9, weight_decay=0.0005)

# Step 4: Training Loop
for epoch in range(num_epochs):
    model.train()
    total_loss = 0

    for images, targets in train_loader:
        images = list(image.to(device) for image in images)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        # Forward pass and compute loss
        loss_dict = model(images, targets)
        losses = sum(loss for loss in loss_dict.values())

        optimizer.zero_grad()
        losses.backward()
        optimizer.step()

        total_loss += losses.item()

    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {total_loss:.4f}")

# Step 5: Evaluation
model.eval()
all_preds = []
all_targets = []

with torch.no_grad():
    for images, targets in test_loader:
        images = list(image.to(device) for image in images)
        outputs = model(images)

        for output, target in zip(outputs, targets):
            all_preds.append(output["labels"].cpu().numpy())
            all_targets.append(target["labels"].cpu().numpy())

print("Evaluation complete!")


Epoch [1/10], Loss: 315.0721
Epoch [2/10], Loss: 289.7153
Epoch [3/10], Loss: 289.7585
