In [1]:
import pandas as pd
import numpy as np
from PIL import Image

import torch
import torch.utils.data

# helper functions from https://github.com/pytorch/vision/tree/main/references/detection
from engine import train_one_epoch, evaluate
import utils
import transforms as T

In [2]:
# df = pd.read_csv("data/train/_annotations.csv", header=None, names=["img", "xmin", "ymin",        
#                                                                     "xmax", "ymax", "alpha"])
# df.drop(["alpha"], axis=1, inplace=True)

Directory structure:
```text
data/
├── test/
│   └── _annotations.csv
│   └── img1.jpg
│   └── img2.jpg
│   └── img3.jpg
│   └── ...
├── train/
│   └── _annotations.csv
│   └── img1.jpg
│   └── img2.jpg
│   └── img3.jpg
│   └── ...
└── valid/
    └── _annotations.csv
    └── img1.jpg
    └── img2.jpg
    └── img3.jpg
    └── ...
```

# Creating dataloader

In [3]:
class WildfireDataset(torch.utils.data.Dataset):
    def __init__(self, df_sample, annotations="_annotations.csv", transforms=None):
            '''
            df_sample : could be 'train', 'test' or 'valid'
            '''
            self.df_sample = df_sample
            self.annotations = annotations
            self.transforms = transforms
            self.path = "data/" + df_sample + "/"
            self.df = pd.read_csv(self.path + annotations, header=None, names=["img", "xmin", "ymin",        
                                                                    "xmax", "ymax", "alpha"])
            self.df.drop(["alpha"], axis=1, inplace=True)
            self.img_names = self.df["img"].to_numpy()
            
    def __getitem__(self, idx):
        # load images and bounding boxes
        img = Image.open(self.path + self.img_names[idx])
        boxes = self.df[self.df["img"] == self.img_names[idx]][self.df.columns[1:]].values
        num_objs = len(boxes)
        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        
        labels = torch.ones((num_objs,), dtype=torch.int64) # only one lable

        image_id = torch.tensor([idx])
        area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
        # suppose all instances are not crowd
        iscrowd = torch.zeros((num_objs,), dtype=torch.int64)

        target = {} # an array of values that will predict
        target["boxes"] = boxes
        target["labels"] = labels
        target["image_id"] = image_id
        target["area"] = area
        target["iscrowd"] = iscrowd
        
        
        if self.transforms is not None:
            img, target = self.transforms(img, target)

        return img, target
    
    def __len__(self):
        return len(self.img_names)



Test dataset class

In [4]:
dataset = WildfireDataset("train")
print(dataset.__getitem__(0))
%reset_selective -f dataset

(<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=640x480 at 0x7F6846E49F70>, {'boxes': tensor([[482., 296., 525., 320.]]), 'labels': tensor([1]), 'image_id': tensor([0]), 'area': tensor([1032.]), 'iscrowd': tensor([0])})


# Training the model

In [5]:
def get_model(num_classes):
    # load an instance segmentation model pre-trained on COCO
    model = torchvision.models.detection.maskrcnn_resnet50_fpn(pretrained=True)

    # get number of input features for the classifier
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    # replace the pre-trained head with a new one
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

    # now get the number of input features for the mask classifier
    in_features_mask = model.roi_heads.mask_predictor.conv5_mask.in_channels
    hidden_layer = 256
    # and replace the mask predictor with a new one
    model.roi_heads.mask_predictor = MaskRCNNPredictor(in_features_mask, hidden_layer,
                                                       num_classes)

    return model

In [6]:
def get_transform(train):
    transforms = []
    # converts the image, a PIL image, into a PyTorch Tensor
    transforms.append(T.ToTensor())
    if train:
        # during training, randomly flip the training images
        # and ground-truth for data augmentation
        transforms.append(T.RandomHorizontalFlip(0.5))
    return T.Compose(transforms)
