In [1]:
# dataset in D:\DATA\APPLE\Roboflow\

# folder structure
# /train
#   _annotations.coco.json
#   img1.jpg
#   img2.jpg
#   ...
# /valid
#   _annotations.coco.json
#   img1.jpg
#   img2.jpg
#   ...

import os
from torchvision.datasets import CocoDetection
from torchvision import transforms
from torch.utils.data import DataLoader

# Define paths
train_images_dir = "D:/DATA/APPLE/Roboflow/train"
train_annotations_file = os.path.join(train_images_dir, "_annotations.coco.json")
valid_images_dir = "D:/DATA/APPLE/Roboflow/valid"
valid_annotations_file = os.path.join(valid_images_dir, "_annotations.coco.json")

# Define a basic transform (resizing, normalization, etc. as needed)
transform = transforms.Compose(
    [
        # transforms.Resize(
        #     (224, 224)
        # ),
        transforms.ToTensor(),
    ]
)


# Create custom dataset for COCO format
class CustomCocoDataset(CocoDetection):
    def __getitem__(self, index):
        img, target = super().__getitem__(index)
        # Preprocess target as needed

        bbox = [ann["bbox"] for ann in target]  # Get all bboxes in the target
        category_id = [ann["category_id"] for ann in target]  # Get all category IDs

        # Return image, bbox, and category information as a dictionary
        count = len(bbox)
        return img, count
    # {"bboxes": bbox, "category_ids": category_id}

    """TO DO"""
    def _view(self,index):
        """to visualize the annotated image

        Args:
            index (int): for image at specific index
        """
        pass

In [2]:
# Initialize train and validation datasets
train_dataset = CustomCocoDataset(
    root=train_images_dir, 
    annFile=train_annotations_file, 
    transform=transform
)

valid_dataset = CustomCocoDataset(
    root=valid_images_dir, 
    annFile=valid_annotations_file, 
    transform=transform
)

# Create DataLoaders
train_loader = DataLoader(
    train_dataset,
    batch_size=10,  # Adjust batch size as needed
    shuffle=True,
    # num_workers=2,  # Adjust num_workers based on your system
    # collate_fn=lambda x: tuple(
    #     zip(*x)
    # ),  # Use custom collate_fn to handle variable data sizes
)

valid_loader = DataLoader(
    valid_dataset,
    batch_size=40,
    shuffle=False,
    # num_workers=2,
    # collate_fn=lambda x: tuple(zip(*x)),
)

# Sample usage to test the DataLoader
for images, targets in train_loader:
    print("Images batch shape:", images.shape)  # Shape of each image in the batch
    print("Targets batch:", targets)  # Bounding boxes and category IDs
    
    print()

loading annotations into memory...
Done (t=0.02s)
creating index...
index created!
loading annotations into memory...
Done (t=0.00s)
creating index...
index created!
Images batch shape: torch.Size([10, 3, 192, 192])
Targets batch: tensor([ 4,  2,  5, 18,  4,  1,  4,  8,  3, 12])

Images batch shape: torch.Size([10, 3, 192, 192])
Targets batch: tensor([2, 4, 2, 1, 8, 7, 5, 2, 1, 1])

Images batch shape: torch.Size([10, 3, 192, 192])
Targets batch: tensor([1, 1, 1, 1, 5, 1, 1, 1, 1, 5])

Images batch shape: torch.Size([10, 3, 192, 192])
Targets batch: tensor([ 3,  2,  2,  1,  1, 10,  3,  1,  1,  3])

Images batch shape: torch.Size([10, 3, 192, 192])
Targets batch: tensor([2, 1, 6, 4, 1, 1, 4, 3, 1, 1])

Images batch shape: torch.Size([10, 3, 192, 192])
Targets batch: tensor([6, 1, 1, 2, 2, 1, 6, 3, 1, 5])

Images batch shape: torch.Size([10, 3, 192, 192])
Targets batch: tensor([ 1,  9, 15,  3,  1,  1,  2,  1,  1,  1])

Images batch shape: torch.Size([10, 3, 192, 192])
Targets batch: tens

In [3]:
print(train_dataset[2][1])
train_dataset[10][0].shape

6


torch.Size([3, 192, 192])

In [4]:
import torchvision
from torch import nn

model = torchvision.models.resnet101(pretrained=True, progress=True)



In [5]:
# training loop
import torch.optim

loss_fn = torch.nn.CrossEntropyLoss()

device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
model.to(device)


optimizer = torch.optim.SGD(model.parameters(), lr=0.005, momentum=0.9, weight_decay=0.0005)

In [6]:
model.train()

for epoch in range(10):

    for images, targets in train_loader:

        # print(images.shape, targets.shape)  # torch.Size([4, 3, 192, 192]) torch.Size([4])
        images = images.to(device)
        targets = targets.to(device)
        output = model(images)
        # print(output.shape) # torch.Size([4, 1000])
        loss = loss_fn(output,targets)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    
    print(loss)

tensor(3.4770, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(1.0063, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(2.2454, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(2.4774, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(3.1142, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(1.3711, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(2.1979, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(2.5718, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(2.8413, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(2.1581, device='cuda:0', grad_fn=<NllLossBackward0>)


In [8]:
import torch

# Assuming you're in the training loop
correct = 0
total = 0

model.eval()  # Set the model to evaluation mode for accuracy calculation
with torch.no_grad():  # Disable gradient computation for efficiency
    for images, targets in valid_loader:
        images, targets = images.to(device), targets.to(device)

        # Forward pass to get model predictions
        output = model(images)  # Output shape is [batch_size, 1000] for classification

        # Get the predicted classes (argmax along the class dimension)
        _, predicted = torch.max(output, dim=1)

        # Update correct predictions count
        correct += (predicted == targets).sum().item()
        total += targets.size(0)

# Calculate accuracy
accuracy = 100 * correct / total
print(f"Accuracy: {accuracy:.2f}%")

Accuracy: 38.36%
