In [11]:
import torch
from torch.utils.data import DataLoader
import torchvision
from torchvision.models.detection import fasterrcnn_resnet50_fpn
from torchvision.datasets import ImageFolder
from torchvision import transforms
import torchvision.transforms as T
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor

In [12]:
# Load the pre-trained Faster R-CNN model with a ResNet-50 backbone
model = fasterrcnn_resnet50_fpn(pretrained=True)

# Number of classes (your dataset classes + 1 for background)
num_classes = 2  # For example, 2 classes + background

# Get the number of input features for the classifier
in_features = model.roi_heads.box_predictor.cls_score.in_features

# Replace the head of the model with a new one (for the number of classes in your dataset)
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

In [13]:
import os
import torch
import cv2
from torch.utils.data import Dataset
import torchvision.transforms as T

class CustomDataset(Dataset):
    def __init__(self, image_dir, label_dir, transforms=None):
        self.image_dir = image_dir
        self.label_dir = label_dir
        self.transforms = transforms

        self.image_files = sorted([
            f for f in os.listdir(image_dir)
            if f.endswith(('.jpg', '.png', '.jpeg'))
        ])

    def __len__(self):
        return len(self.image_files)

    def __getitem__(self, idx):
        image_filename = self.image_files[idx]
        image_path = os.path.join(self.image_dir, image_filename)
        label_path = os.path.join(self.label_dir, os.path.splitext(image_filename)[0] + ".txt")

        # Load image
        image = cv2.imread(image_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        height, width = image.shape[:2]

        # Load labels
        boxes = []
        labels = []

        if os.path.exists(label_path):
            with open(label_path, 'r') as f:
                for line in f.readlines():
                    parts = line.strip().split()
                    class_id, x_center, y_center, w, h = map(float, parts)

                    # Convert from YOLO format to [x_min, y_min, x_max, y_max]
                    x_center *= width
                    y_center *= height
                    w *= width
                    h *= height

                    x_min = x_center - w / 2
                    y_min = y_center - h / 2
                    x_max = x_center + w / 2
                    y_max = y_center + h / 2

                    boxes.append([x_min, y_min, x_max, y_max])
                    labels.append(int(class_id) + 1)  # Make sure class starts from 1

        # Convert to tensors
        boxes = torch.tensor(boxes, dtype=torch.float32)
        labels = torch.tensor(labels, dtype=torch.int64)

        target = {
            "boxes": boxes,
            "labels": labels,
            "image_id": torch.tensor([idx])
        }

        if self.transforms:
            image = self.transforms(image)

        return image, target


In [14]:
import torchvision.transforms as T

transform = T.Compose([
    T.ToTensor()
])

In [15]:
# Load dataset
dataset = CustomDataset(transforms=transform, image_dir= "/Users/jin/Documents/GitHub/datasets/Fingerprint pattern/v1/standard_arch dataset v1/train/images", label_dir="/Users/jin/Documents/GitHub/datasets/Fingerprint pattern/v1/standard_arch dataset v1/train/labels")
# Split into train and validation sets
indices = torch.randperm(len(dataset)).tolist()
train_dataset = torch.utils.data.Subset(dataset, indices[:-50])
valid_dataset = torch.utils.data.Subset(dataset, indices[-50:])
# Create data loaders
train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True, 
                                   collate_fn=lambda x: tuple(zip(*x)))
valid_loader = DataLoader(valid_dataset, batch_size=4, shuffle=False, 
                                    collate_fn=lambda x: tuple(zip(*x)))

In [16]:
# Move model to GPU if available
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
model.to(device)

# Set up the optimizer
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.005, momentum=0.9, 
                                                   weight_decay=0.0005)
# Learning rate scheduler
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, 
                                                               gamma=0.1)
# Train the model
num_epochs = 3
for epoch in range(num_epochs):
    model.train()
    train_loss = 0.0

   # Training loop
    for images, targets in train_loader:
        images = list(image.to(device) for image in images)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        # Zero the gradients
        optimizer.zero_grad()

        # Forward pass
        loss_dict = model(images, targets)
        losses = sum(loss for loss in loss_dict.values())

        # Backward pass
        losses.backward()
        optimizer.step()
        train_loss += losses.item()

    # Update the learning rate
    lr_scheduler.step()
    print(f'Epoch: {epoch + 1}, Loss: {train_loss / len(train_loader)}')
print("Training complete!")

Epoch: 1, Loss: 0.9924055604558242
Epoch: 2, Loss: 0.5935266064970117
Epoch: 3, Loss: 0.5256115245191675
Training complete!


In [17]:
# Set the model to evaluation mode
model.eval()
# Test on a new image
with torch.no_grad():
    for images, targets in valid_loader:
        images = list(img.to(device) for img in images)
        predictions = model(images)
        # Example: print the bounding boxes and labels for the first image
        print(predictions[0]['boxes'])
        print(predictions[0]['labels'])


tensor([[ 56.3921,   2.9022, 239.8567,  93.2407],
        [ 56.2282,  86.0404, 225.1559, 120.7215],
        [ 56.2522,  65.8210, 224.5156, 100.2525],
        [ 74.5040,  43.2841, 233.4493,  80.7620],
        [ 67.3717, 105.8419, 233.5684, 141.1762],
        [102.3403,  11.8732, 200.7585,  93.9527],
        [ 84.3384,  72.7186, 237.8387, 112.1200],
        [103.3822,  63.3907, 201.4525, 144.9570],
        [ 68.3047,  23.0465, 229.4119,  59.9495],
        [ 56.6734,   4.2122, 226.1692,  38.9015],
        [ 20.4224,  61.7952, 256.0000, 137.3971],
        [ 23.0091, 100.3154, 225.6618, 129.5054],
        [ 43.7749,  19.2752, 256.0000,  46.6501],
        [110.2022,   1.7493, 213.5853,  57.0366],
        [ 33.0565, 120.5286, 242.4099, 149.7519],
        [ 31.3008,  37.7520, 218.8332,  68.8878],
        [ 79.0136,  38.7200, 193.6298, 127.6466],
        [ 16.6159,  79.9667, 220.7029, 109.1118],
        [ 97.2839, 134.3909, 256.0000, 171.8141],
        [  0.0000, 101.2281, 256.0000, 173.3963],


In [35]:
from PIL import Image
# Load image
img = Image.open("test_images/arch/arch_3.tif")

# Apply the same transformation as for training
img = transform(img)
img = img.unsqueeze(0).to(device)
# Model prediction
model.eval()
with torch.no_grad():
    prediction = model(img)
scores = prediction[0]['scores']

threshold = 0.03
conf_mask = scores > threshold

# Print the predicted bounding boxes and labels
print(prediction[0]['boxes'][conf_mask])
print(prediction[0]['labels'][conf_mask])

tensor([[ 53.7582,   5.4758, 268.0063,  49.9969],
        [ 53.0059,   3.3054, 283.5370, 119.3975],
        [ 65.3791,  57.4891, 274.7841, 102.3167],
        [117.0816,   3.7758, 241.6418, 105.1724],
        [ 87.4906,  14.9046, 285.4268,  64.2436],
        [ 92.5538,  40.8661, 287.7224,  90.0456],
        [ 77.0517,  82.6359, 283.1801, 128.2363],
        [121.6125,  43.5112, 251.1219, 145.1951],
        [ 48.7955,  32.1668, 264.3596,  75.7324],
        [ 78.1005, 108.7204, 283.0779, 154.5441]])
tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1])
