In [1]:
import pandas as pd
import numpy as np
from PIL import Image

import torch
import torch.utils.data
import torchvision

from torchvision.models.detection.faster_rcnn import FastRCNNPredictor

# helper functions from https://github.com/pytorch/vision/tree/main/references/detection
import sys
sys.path.insert(1, './lib')

from engine import train_one_epoch, evaluate
import utils
import transforms as T

In [2]:
# df = pd.read_csv("data/train/_annotations.csv", header=None, names=["img", "xmin", "ymin",        
#                                                                     "xmax", "ymax", "alpha"])
# df.drop(["alpha"], axis=1, inplace=True)

Directory structure:
```text
data/
├── test/
│   └── _annotations.csv
│   └── img1.jpg
│   └── img2.jpg
│   └── img3.jpg
│   └── ...
├── train/
│   └── _annotations.csv
│   └── img1.jpg
│   └── img2.jpg
│   └── img3.jpg
│   └── ...
└── valid/
    └── _annotations.csv
    └── img1.jpg
    └── img2.jpg
    └── img3.jpg
    └── ...
```

## Creating dataset class

In [3]:
class WildfireDataset(torch.utils.data.Dataset):
    def __init__(self, df_sample, annotations="_annotations.csv", transforms=None):
            '''
            df_sample : could be 'train', 'test' or 'valid'
            '''
            self.df_sample = df_sample
            self.annotations = annotations
            self.transforms = transforms
            self.path = "data/" + df_sample + "/"
            self.df = pd.read_csv(self.path + annotations, header=None, names=["img", "xmin", "ymin",        
                                                                    "xmax", "ymax", "alpha"])
            self.df.drop(["alpha"], axis=1, inplace=True)
            self.img_names = self.df["img"].to_numpy()
            
    def __getitem__(self, idx):
        # load images and bounding boxes
        img = Image.open(self.path + self.img_names[idx])
        boxes = self.df[self.df["img"] == self.img_names[idx]][self.df.columns[1:]].values
        num_objs = len(boxes)
        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        
        labels = torch.ones((num_objs,), dtype=torch.int64) # only one lable

        image_id = torch.tensor([idx])
        area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
        # suppose all instances are not crowd
        iscrowd = torch.zeros((num_objs,), dtype=torch.int64)

        target = {} # an array of values that will predict
        target["boxes"] = boxes
        target["labels"] = labels
        target["image_id"] = image_id
        target["area"] = area
        target["iscrowd"] = iscrowd
        
        
        if self.transforms is not None:
            img, target = self.transforms(img, target)

        return img, target
    
    def __len__(self):
        return len(self.img_names)



Test dataset class

In [4]:
dataset = WildfireDataset("train")
print(dataset.__getitem__(0))
%reset_selective -f dataset

(<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=640x480 at 0x7F96208BCF40>, {'boxes': tensor([[482., 296., 525., 320.]]), 'labels': tensor([1]), 'image_id': tensor([0]), 'area': tensor([1032.]), 'iscrowd': tensor([0])})


## Configuring the model

In [5]:
def get_model(num_classes):
    # load an instance segmentation model pre-trained on COCO
    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)

    # get the number of input features for the classifier
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    # replace the pre-trained head with a new on
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
    return model

Transform function increases number of examples for training by applying random rotation 

In [6]:
def get_transform(train):
    transforms = []
    # converts the image, a PIL image, into a PyTorch Tensor
    transforms.append(T.ToTensor())
    if train:
        # during training, randomly flip the training images
        # and ground-truth for data augmentation
        transforms.append(T.RandomHorizontalFlip(0.5))
    return T.Compose(transforms)


In [7]:
# creating dataset instances
dataset = WildfireDataset("train", transforms=get_transform(train=True))
dataset_test = WildfireDataset("test", transforms=get_transform(train=False))

# # spliting them to samples
# test_size = 40
# indices = torch.randperm(len(dataset)).tolist()
# dataset = torch.utils.data.Subset(dataset, indices[:-test_size])
# dataset_test = torch.utils.data.Subset(dataset_test, indices[-test_size:])

# defining data loaders
data_loader = torch.utils.data.DataLoader(dataset, batch_size=2, shuffle=True, num_workers=4,
                                          collate_fn=utils.collate_fn)
data_loader_test = torch.utils.data.DataLoader(dataset_test, batch_size=1, shuffle=False, 
                                               num_workers=4,collate_fn=utils.collate_fn)


## Training the model

Checks if gpu acceleration available

In [8]:
torch.cuda.is_available()

True

In [9]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

num_classes = 2 # wildfire and not wildfire 
model = get_model(num_classes) # creating model
model.to(device) # moving to device

# setups an optimizer
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005)

# learning rate scheduler which decreases the learning rate by `gamma` every `step_size` epochs
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1)


In [10]:
num_epochs = 10
for epoch in range(num_epochs):
    train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq=25)
    # update the learning rate
    lr_scheduler.step()
    # evaluate on the test dataset
    evaluate(model, data_loader_test, device=device)

  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


Epoch: [0]  [  0/258]  eta: 0:03:20  lr: 0.000024  loss: 0.9646 (0.9646)  loss_classifier: 0.7263 (0.7263)  loss_box_reg: 0.0343 (0.0343)  loss_objectness: 0.1907 (0.1907)  loss_rpn_box_reg: 0.0133 (0.0133)  time: 0.7770  data: 0.1756  max mem: 2115
Epoch: [0]  [ 25/258]  eta: 0:02:07  lr: 0.000510  loss: 0.2521 (0.4196)  loss_classifier: 0.0845 (0.2670)  loss_box_reg: 0.0220 (0.0329)  loss_objectness: 0.1087 (0.1107)  loss_rpn_box_reg: 0.0102 (0.0091)  time: 0.5364  data: 0.0095  max mem: 2380
Epoch: [0]  [ 50/258]  eta: 0:01:53  lr: 0.000996  loss: 0.2137 (0.3271)  loss_classifier: 0.0864 (0.1853)  loss_box_reg: 0.0954 (0.0626)  loss_objectness: 0.0284 (0.0714)  loss_rpn_box_reg: 0.0044 (0.0077)  time: 0.5421  data: 0.0095  max mem: 2380
Epoch: [0]  [ 75/258]  eta: 0:01:39  lr: 0.001482  loss: 0.1781 (0.2855)  loss_classifier: 0.0678 (0.1483)  loss_box_reg: 0.0954 (0.0755)  loss_objectness: 0.0070 (0.0540)  loss_rpn_box_reg: 0.0037 (0.0077)  time: 0.5467  data: 0.0098  max mem: 2380


Epoch: [2]  [  0/258]  eta: 0:02:41  lr: 0.005000  loss: 0.0729 (0.0729)  loss_classifier: 0.0194 (0.0194)  loss_box_reg: 0.0528 (0.0528)  loss_objectness: 0.0001 (0.0001)  loss_rpn_box_reg: 0.0007 (0.0007)  time: 0.6253  data: 0.1274  max mem: 2380
Epoch: [2]  [ 25/258]  eta: 0:01:57  lr: 0.005000  loss: 0.1115 (0.1130)  loss_classifier: 0.0299 (0.0318)  loss_box_reg: 0.0716 (0.0763)  loss_objectness: 0.0002 (0.0016)  loss_rpn_box_reg: 0.0020 (0.0032)  time: 0.4973  data: 0.0032  max mem: 2380
Epoch: [2]  [ 50/258]  eta: 0:01:44  lr: 0.005000  loss: 0.0908 (0.1085)  loss_classifier: 0.0263 (0.0306)  loss_box_reg: 0.0655 (0.0737)  loss_objectness: 0.0004 (0.0012)  loss_rpn_box_reg: 0.0031 (0.0030)  time: 0.4975  data: 0.0034  max mem: 2380
Epoch: [2]  [ 75/258]  eta: 0:01:31  lr: 0.005000  loss: 0.1100 (0.1120)  loss_classifier: 0.0280 (0.0309)  loss_box_reg: 0.0862 (0.0771)  loss_objectness: 0.0002 (0.0012)  loss_rpn_box_reg: 0.0021 (0.0028)  time: 0.4960  data: 0.0032  max mem: 2380


Epoch: [4]  [  0/258]  eta: 0:02:44  lr: 0.000500  loss: 0.0822 (0.0822)  loss_classifier: 0.0220 (0.0220)  loss_box_reg: 0.0588 (0.0588)  loss_objectness: 0.0000 (0.0000)  loss_rpn_box_reg: 0.0014 (0.0014)  time: 0.6370  data: 0.1262  max mem: 2380
Epoch: [4]  [ 25/258]  eta: 0:01:56  lr: 0.000500  loss: 0.0787 (0.0856)  loss_classifier: 0.0207 (0.0227)  loss_box_reg: 0.0482 (0.0585)  loss_objectness: 0.0002 (0.0006)  loss_rpn_box_reg: 0.0008 (0.0038)  time: 0.4965  data: 0.0031  max mem: 2380
Epoch: [4]  [ 50/258]  eta: 0:01:43  lr: 0.000500  loss: 0.0733 (0.0807)  loss_classifier: 0.0220 (0.0232)  loss_box_reg: 0.0491 (0.0540)  loss_objectness: 0.0001 (0.0006)  loss_rpn_box_reg: 0.0012 (0.0028)  time: 0.4967  data: 0.0034  max mem: 2380
Epoch: [4]  [ 75/258]  eta: 0:01:31  lr: 0.000500  loss: 0.0873 (0.0838)  loss_classifier: 0.0241 (0.0242)  loss_box_reg: 0.0551 (0.0563)  loss_objectness: 0.0001 (0.0005)  loss_rpn_box_reg: 0.0012 (0.0028)  time: 0.4980  data: 0.0037  max mem: 2380


Epoch: [6]  [  0/258]  eta: 0:03:20  lr: 0.000050  loss: 0.0962 (0.0962)  loss_classifier: 0.0262 (0.0262)  loss_box_reg: 0.0693 (0.0693)  loss_objectness: 0.0000 (0.0000)  loss_rpn_box_reg: 0.0006 (0.0006)  time: 0.7760  data: 0.2172  max mem: 2380
Epoch: [6]  [ 25/258]  eta: 0:02:09  lr: 0.000050  loss: 0.0531 (0.0587)  loss_classifier: 0.0165 (0.0164)  loss_box_reg: 0.0363 (0.0408)  loss_objectness: 0.0000 (0.0003)  loss_rpn_box_reg: 0.0007 (0.0012)  time: 0.5489  data: 0.0093  max mem: 2380
Epoch: [6]  [ 50/258]  eta: 0:01:55  lr: 0.000050  loss: 0.0706 (0.0689)  loss_classifier: 0.0227 (0.0197)  loss_box_reg: 0.0505 (0.0471)  loss_objectness: 0.0001 (0.0005)  loss_rpn_box_reg: 0.0015 (0.0016)  time: 0.5486  data: 0.0103  max mem: 2380
Epoch: [6]  [ 75/258]  eta: 0:01:41  lr: 0.000050  loss: 0.0652 (0.0712)  loss_classifier: 0.0174 (0.0198)  loss_box_reg: 0.0468 (0.0492)  loss_objectness: 0.0000 (0.0004)  loss_rpn_box_reg: 0.0010 (0.0019)  time: 0.5511  data: 0.0100  max mem: 2380


Epoch: [8]  [  0/258]  eta: 0:04:38  lr: 0.000050  loss: 0.0801 (0.0801)  loss_classifier: 0.0203 (0.0203)  loss_box_reg: 0.0580 (0.0580)  loss_objectness: 0.0001 (0.0001)  loss_rpn_box_reg: 0.0016 (0.0016)  time: 1.0799  data: 0.5789  max mem: 2380
Epoch: [8]  [ 25/258]  eta: 0:02:13  lr: 0.000050  loss: 0.0474 (0.0617)  loss_classifier: 0.0158 (0.0175)  loss_box_reg: 0.0342 (0.0427)  loss_objectness: 0.0001 (0.0002)  loss_rpn_box_reg: 0.0011 (0.0013)  time: 0.5499  data: 0.0095  max mem: 2380
Epoch: [8]  [ 50/258]  eta: 0:01:56  lr: 0.000050  loss: 0.0533 (0.0640)  loss_classifier: 0.0193 (0.0187)  loss_box_reg: 0.0341 (0.0437)  loss_objectness: 0.0001 (0.0003)  loss_rpn_box_reg: 0.0012 (0.0014)  time: 0.5515  data: 0.0104  max mem: 2380
Epoch: [8]  [ 75/258]  eta: 0:01:42  lr: 0.000050  loss: 0.0755 (0.0687)  loss_classifier: 0.0185 (0.0199)  loss_box_reg: 0.0528 (0.0472)  loss_objectness: 0.0001 (0.0003)  loss_rpn_box_reg: 0.0011 (0.0013)  time: 0.5508  data: 0.0104  max mem: 2380


In [11]:
torch.save(model.state_dict(), "models/model.pt")