In [15]:
TENSOR_SIZE = 128
BATCH_SIZE = 16

In [1]:
import os

dataset_path = './D-Fire'
print(os.listdir(dataset_path))

['test', 'train']


In [3]:
from torchvision.datasets import ImageFolder
from torchvision.transforms import ToTensor, Resize, Compose, Normalize, RandomHorizontalFlip, RandomRotation, ColorJitter

transform = Compose([
    Resize((TENSOR_SIZE, TENSOR_SIZE)),
    ToTensor(),
    RandomHorizontalFlip(p=0.5),
    RandomRotation(degrees=15),
    ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.02),
    Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

In [4]:
import os
from torch.utils.data import Dataset
from PIL import Image

class FireClassificationDataset(Dataset):
    def __init__(self, img_dir, label_dir, transform=None):
        self.img_dir = img_dir
        self.label_dir = label_dir
        self.transform = transform
        self.image_filenames = os.listdir(img_dir)

    def __len__(self):
        return len(self.image_filenames)

    def __getitem__(self, index):
        img_filename = self.image_filenames[index]
        img_path = os.path.join(self.img_dir, img_filename)
        img = Image.open(img_path).convert('RGB')

        label_path = os.path.join(self.label_dir, os.path.splitext(img_filename)[0] + '.txt')
        try:
            with open(label_path, 'r') as f:
                lines = f.readlines()
            # Check if there's any fire object (class_id = 1) in the image
            label = 1 if any(int(line.split()[0]) == 1 for line in lines) else 0
        except FileNotFoundError:
            print(f"Warning: Label file not found for image {img_filename}. Using default label 0.")
            label = 0

        if self.transform:
            img = self.transform(img)

        return img, label #img_filename (returning it causes some problems in to_device())


In [5]:
train_images_path = dataset_path + "/train/images"
train_labels_path = dataset_path + "/train/labels"

test_images_path = dataset_path + "/test/images"
test_labels_path = dataset_path + "/test/labels"

train_dataset = FireClassificationDataset(train_images_path, train_labels_path, transform=transform)
test_dataset = FireClassificationDataset(test_images_path, test_labels_path, transform=transform)

In [10]:
len(train_dataset)

17221

In [11]:
import torch
from torch.utils.data import random_split

validation_size = round(0.5 * len(test_dataset))
test_size = len(test_dataset) - validation_size

test_dataset, validation_dataset = random_split(test_dataset, [test_size, validation_size])

In [12]:
len(validation_dataset)

2136

In [13]:
len(test_dataset)

2137

In [16]:
from torch.utils.data.dataloader import DataLoader

train_dataloader = DataLoader(train_dataset, batch_size = BATCH_SIZE, shuffle = True, num_workers = 2, pin_memory = True)
validation_dataloader = DataLoader(validation_dataset, batch_size = BATCH_SIZE * 2, num_workers = 2, pin_memory = True)
test_dataloader = DataLoader(test_dataset, batch_size = BATCH_SIZE * 2, num_workers = 2, pin_memory = True)

In [17]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

From here on we will load a pretrained model and then train some of its layers on our train set.

In [24]:
import torchvision.models as models

model = models.resnet50(weights=models.ResNet50_Weights.DEFAULT)

model.eval()

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [26]:
import torch.nn as nn

# freeze all layers except fc
for param in model.parameters():
    param.requires_grad = False

num_classes = 2 # fire or no fire
model.fc = nn.Linear(model.fc.in_features, num_classes)
model = model.to(device)

In [27]:
import torch.optim as optim

# Define optimizer and loss function
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.fc.parameters(), lr=0.001, momentum=0.9)

Training phase

In [28]:
for epoch in range(10):
    model.train()
    running_loss = 0.0
    for i, (inputs, labels) in enumerate(train_dataloader):
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()

        outputs = model(inputs)

        loss = criterion(outputs, labels)
        loss.backward()
        running_loss += loss.item()

        optimizer.step()

    print('Epoch %d loss: %.3f' % (epoch + 1, running_loss / len(train_dataloader)))

Validation phase

In [None]:
from sklearn.metrics import accuracy_score, confusion_matrix
import numpy as np

# VALIDATION
model.eval()
val_predictions = []
val_targets = []
with torch.no_grad():
    for inputs, labels in validation_dataloader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        _, predicted = torch.max(outputs, 1)
        val_predictions.extend(predicted.cpu().numpy())
        val_targets.extend(labels.cpu().numpy())
    val_acc = accuracy_score(val_targets, val_predictions) * 100
    print(f'Validation accuracy: {val_acc:.3f}%')

Test phase

In [None]:
import matplotlib.pyplot as plt

# TEST
model.eval()
test_predictions = []
test_targets = []
with torch.no_grad():
    for inputs, labels in test_dataloader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        _, predicted = torch.max(outputs, 1)
        test_predictions.extend(predicted.cpu().numpy())
        test_targets.extend(labels.cpu().numpy())
    test_acc = accuracy_score(test_targets, test_predictions) * 100
    print(f'Test accuracy: {test_acc:.3f}%')

    # Compute confusion matrix for test set
    cm = confusion_matrix(test_targets, test_predictions)

    # Plot confusion matrix
    fig, ax = plt.subplots(figsize=(8, 8))
    im = ax.imshow(cm, interpolation='nearest', cmap=plt.cm.Blues)
    ax.figure.colorbar(im, ax=ax)
    ax.set(xticks=np.arange(cm.shape[1]),
           yticks=np.arange(cm.shape[0]),
           xticklabels=np.arange(8),
           yticklabels=np.arange(8),
           title='Confusion matrix',
           ylabel='True label',
           xlabel='Predicted label')
    plt.setp(ax.get_xticklabels(), rotation=45, ha="right",
             rotation_mode="anchor")
    for i in range(cm.shape[0]):
        for j in range(cm.shape[1]):
            ax.text(j, i, format(cm[i, j], 'd'),
                    ha="center", va="center",
                    color="white" if cm[i, j] > cm.max() / 2. else "black")
    fig.tight_layout()
    plt.show()