# Detection with CNNClassifier


In [1]:
from dotenv import load_dotenv

load_dotenv()

import sys
import os

sys.path.append(os.getenv('SRC_DIR'))

from utils.system import display_system_info

display_system_info(markdown=True)


**Last Updated**: 2025-04-27 19:20:11

**Python Version**: 3.11.12  
**OS**: Windows 10.0.26100  
**Architecture**: 64bit  
**Hostname**: ShenLaptop  
**Processor**: Intel64 Family 6 Model 186 Stepping 3, GenuineIntel  
**RAM Size**: 15.65 GB  
  
        

In [2]:
from torchvision import transforms
import torch
import cv2
from PIL import Image
import numpy as np

from classify import TorchClassifier

In [15]:
IDX_TO_CLASS = {
    0: 'swimming',
    1: 'treadwater',
    2: 'drowning'
}

def xywhn_to_xyxy(image, x_n, y_n, w_n, h_n):
    # cv2 images in HWC
    height, width, _ = image.shape

    x1 = int((x_n - w_n / 2) * width)
    y1 = int((y_n - h_n / 2) * height)
    x2 = int((x_n + w_n / 2) * width)
    y2 = int((y_n + h_n / 2) * height)

    return x1, y1, x2, y2

def cop_save_roi(images_folder, labels_folder, output_folder, idx_to_class):
    os.makedirs(output_folder, exist_ok=True)
    for class_id, class_name in idx_to_class.items():
        os.makedirs(os.path.join(output_folder, class_name), exist_ok=True)
        
    class_counters = {class_id: 0 for class_id in idx_to_class.keys()}

    for image_file in os.listdir(images_folder):
        if image_file.endswith('.jpg'):
            base_name = os.path.splitext(image_file)[0]
            
            label_file = os.path.join(labels_folder, f"{base_name}.txt")
            image_path = os.path.join(images_folder, image_file)
            
            image = cv2.imread(image_path)
            
            # Read the label file
            with open(label_file, 'r') as file:
                lines = file.readlines()
            
            # Loop through each line in the label file
            for line in lines:
                values = line.strip().split()
                class_id = int(values[0])
                x_n, y_n, w_n, h_n = map(float, values[1:])

                x1, y1, x2, y2 = xywhn_to_xyxy(image, x_n, y_n, w_n, h_n)

                roi = image[y1:y2, x1:x2]

                cv2.imwrite(os.path.join(output_folder, idx_to_class[class_id], f"{str(class_counters[class_id]).zfill(6)}.jpg"), roi)

                class_counters[class_id] += 1

In [None]:
for split in ['train', 'val']:
    images_folder = os.path.join(os.getenv('RAW_DATA_DIR'), 'images', split)
    labels_folder = os.path.join(os.getenv('RAW_DATA_DIR'), 'labels', split)
    output_folder = os.path.join(os.getenv('ROI_DATA_DIR'), split)

    cop_save_roi(images_folder, labels_folder, output_folder, IDX_TO_CLASS)

In [4]:
model_config = {
    'num_classes': 3,
    'num_blocks': 4,
    'first_out_channel': 32,
    'out_channel_multiplier': 2,
    'kernel_size': 3,
    'stride': 1,
    'padding': 1,
    'input_shape': (3, 128, 128)
}

model = TorchClassifier(model='CNNClassifier', config=model_config, device='auto', verbose=True)

New model created: CNNClassifier(
  (features): Sequential(
    (0): ConvBlock(
      (block): Sequential(
        (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): ReLU(inplace=True)
        (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
      )
    )
    (1): ConvBlock(
      (block): Sequential(
        (0): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): ReLU(inplace=True)
        (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
      )
    )
    (2): ConvBlock(
      (block): Sequential(
        (0): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, trac

In [3]:
class RandomAffineWithInpainting:
    def __init__(self, degrees=10, translate=(0.1, 0.1), radius=3):
        self.affine = transforms.RandomAffine(degrees=degrees, translate=translate)
        self.inpaint_radius = radius

    def __call__(self, img):
        # Step 1: Apply affine transform (returns PIL image)
        img = self.affine(img)

        # Step 2: Convert to NumPy array
        img_np = np.array(img)

        return Image.fromarray(img_np)

        # Step 3: Create mask where pixels are black
        gray = cv2.cvtColor(img_np, cv2.COLOR_RGB2GRAY)
        mask = cv2.inRange(gray, 0, 1)

        # Step 4: Inpaint using OpenCV
        inpainted_np = cv2.inpaint(img_np, mask, self.inpaint_radius, cv2.INPAINT_TELEA)

        # Step 5: Convert back to PIL
        img_inpainted = Image.fromarray(inpainted_np)

        return img_inpainted
    
class RGBToHSV:
    def __call__(self, img):
        # Ensure the image is in PIL format before converting
        if isinstance(img, torch.Tensor):
            img = transforms.ToPILImage()(img)  # Convert tensor to PIL image
        
        # Convert the image to HSV using PIL
        img_hsv = img.convert("HSV")
        
        return img_hsv
    
class VBlurring:
    def __call__(self, img):
        # Ensure the image is in PIL format before converting
        if isinstance(img, torch.Tensor):
            img = transforms.ToPILImage()(img)

        # apply blurring to the V channel of the HSV image
        h, s, v = cv2.split(np.array(img))

        v_blurred = cv2.blur(v, (7, 7))  # Apply 3x3 average blur to the V channel

        img_hsv_blurred = cv2.merge([h, s, v_blurred])

        return Image.fromarray(img_hsv_blurred)

aug_transform = transforms.Compose([
    transforms.RandomHorizontalFlip(p=1.0),
    RandomAffineWithInpainting(degrees=0, translate=(0.2, 0.2))
])

enhance_transform = transforms.Compose([
    transforms.Resize((128, 128)),
    RGBToHSV(),
    VBlurring(),
    # May addd certain non-deterministic augmentations here to improve generalisability
    transforms.ToTensor()
])

val_transform = transforms.Compose([
    transforms.Resize((128, 128)),
    RGBToHSV(),
    transforms.ToTensor()
])

In [None]:
results = model.train(
    data_path=os.getenv('ROI_DATA_DIR'),
    imbalance=True,
    fraction=1,
    val_test_ratio=0.5,
    input_size=128,
    optimizer='Adam',
    lr=1e-4,
    aug_transform=aug_transform,
    enhance_transform=enhance_transform,
    val_transform=val_transform,
    batch_size=32,
    epochs=15
) 

      Epoch       Loss   Accuracy    Macro(P          R        F1) Weighted(P          R        F1)


       1/15     0.3291      87.08     0.8707     0.8713      0.871     0.8707     0.8713      0.871: 100%|██████████| 552/552 [00:39<00:00, 13.86it/s]


                  Loss


                0.1286: 100%|██████████| 37/37 [00:01<00:00, 24.29it/s]


              precision    recall  f1-score   support

    drowning     0.9405    0.8681    0.9029       182
    swimming     0.9756    0.9841    0.9798       690
  treadwater     0.9113    0.9368    0.9239       285

    accuracy                         0.9542      1157
   macro avg     0.9424    0.9297    0.9355      1157
weighted avg     0.9542    0.9542    0.9539      1157

      Epoch       Loss   Accuracy    Macro(P          R        F1) Weighted(P          R        F1)


       2/15     0.1856      93.32     0.9338     0.9338     0.9338     0.9338     0.9338     0.9338: 100%|██████████| 552/552 [00:41<00:00, 13.35it/s]


                  Loss


                0.1044: 100%|██████████| 37/37 [00:01<00:00, 24.72it/s]


              precision    recall  f1-score   support

    drowning     0.9286    0.9286    0.9286       182
    swimming     0.9716    0.9928    0.9821       690
  treadwater     0.9556    0.9053    0.9297       285

    accuracy                         0.9611      1157
   macro avg     0.9519    0.9422    0.9468      1157
weighted avg     0.9609    0.9611    0.9608      1157

      Epoch       Loss   Accuracy    Macro(P          R        F1) Weighted(P          R        F1)


       3/15     0.1627      94.27     0.9433     0.9432     0.9432     0.9433     0.9432     0.9432: 100%|██████████| 552/552 [00:41<00:00, 13.31it/s]


                  Loss


                0.1423: 100%|██████████| 37/37 [00:01<00:00, 23.96it/s]


              precision    recall  f1-score   support

    drowning     0.9872    0.8462    0.9112       182
    swimming     0.9926    0.9667    0.9794       690
  treadwater     0.8632    0.9965    0.9251       285

    accuracy                         0.9551      1157
   macro avg     0.9477    0.9364    0.9386      1157
weighted avg     0.9599    0.9551    0.9553      1157

      Epoch       Loss   Accuracy    Macro(P          R        F1) Weighted(P          R        F1)


       4/15     0.1515      94.89     0.9494     0.9494     0.9494     0.9494     0.9494     0.9494: 100%|██████████| 552/552 [00:41<00:00, 13.29it/s]


                  Loss


               0.09236: 100%|██████████| 37/37 [00:01<00:00, 24.70it/s]


              precision    recall  f1-score   support

    drowning     0.9716    0.9396    0.9553       182
    swimming     0.9898    0.9812    0.9854       690
  treadwater     0.9327    0.9719    0.9519       285

    accuracy                         0.9723      1157
   macro avg     0.9647    0.9642    0.9642      1157
weighted avg     0.9728    0.9723    0.9724      1157

      Epoch       Loss   Accuracy    Macro(P          R        F1) Weighted(P          R        F1)


       5/15     0.1449       94.9     0.9495     0.9495     0.9495     0.9495     0.9495     0.9495: 100%|██████████| 552/552 [00:41<00:00, 13.34it/s]


                  Loss


               0.08165: 100%|██████████| 37/37 [00:01<00:00, 24.21it/s]


              precision    recall  f1-score   support

    drowning     0.9451    0.9451    0.9451       182
    swimming     0.9956    0.9783    0.9868       690
  treadwater     0.9327    0.9719    0.9519       285

    accuracy                         0.9715      1157
   macro avg     0.9578    0.9651    0.9613      1157
weighted avg     0.9721    0.9715    0.9717      1157

      Epoch       Loss   Accuracy    Macro(P          R        F1) Weighted(P          R        F1)


       6/15     0.1312      95.46     0.9552     0.9551     0.9552     0.9552     0.9551     0.9552: 100%|██████████| 552/552 [00:41<00:00, 13.31it/s]


                  Loss


               0.07796: 100%|██████████| 37/37 [00:01<00:00, 24.71it/s]


              precision    recall  f1-score   support

    drowning     0.9613    0.9560    0.9587       182
    swimming     0.9956    0.9783    0.9868       690
  treadwater     0.9362    0.9789    0.9571       285

    accuracy                         0.9749      1157
   macro avg     0.9644    0.9711    0.9675      1157
weighted avg     0.9756    0.9749    0.9751      1157

      Epoch       Loss   Accuracy    Macro(P          R        F1) Weighted(P          R        F1)


       7/15     0.1205      95.74     0.9579     0.9579     0.9579     0.9579     0.9579     0.9579: 100%|██████████| 552/552 [00:41<00:00, 13.40it/s]


                  Loss


               0.08934: 100%|██████████| 37/37 [00:01<00:00, 24.55it/s]


              precision    recall  f1-score   support

    drowning     0.9556    0.9451    0.9503       182
    swimming     0.9912    0.9812    0.9862       690
  treadwater     0.9422    0.9719    0.9568       285

    accuracy                         0.9732      1157
   macro avg     0.9630    0.9660    0.9644      1157
weighted avg     0.9735    0.9732    0.9733      1157

      Epoch       Loss   Accuracy    Macro(P          R        F1) Weighted(P          R        F1)


       8/15    0.09823      96.64      0.967      0.967      0.967      0.967      0.967      0.967: 100%|██████████| 552/552 [00:41<00:00, 13.39it/s]


                  Loss


               0.07537: 100%|██████████| 37/37 [00:01<00:00, 24.57it/s]


              precision    recall  f1-score   support

    drowning     0.9771    0.9396    0.9580       182
    swimming     0.9898    0.9870    0.9884       690
  treadwater     0.9490    0.9789    0.9637       285

    accuracy                         0.9775      1157
   macro avg     0.9720    0.9685    0.9700      1157
weighted avg     0.9778    0.9775    0.9775      1157

      Epoch       Loss   Accuracy    Macro(P          R        F1) Weighted(P          R        F1)


       9/15    0.08791      96.98     0.9704     0.9704     0.9704     0.9704     0.9704     0.9704: 100%|██████████| 552/552 [00:41<00:00, 13.35it/s]


                  Loss


               0.07872: 100%|██████████| 37/37 [00:01<00:00, 24.61it/s]


              precision    recall  f1-score   support

    drowning     0.9770    0.9341    0.9551       182
    swimming     0.9927    0.9855    0.9891       690
  treadwater     0.9396    0.9825    0.9605       285

    accuracy                         0.9767      1157
   macro avg     0.9698    0.9673    0.9682      1157
weighted avg     0.9772    0.9767    0.9767      1157

      Epoch       Loss   Accuracy    Macro(P          R        F1) Weighted(P          R        F1)


      10/15    0.08549      97.09     0.9715     0.9715     0.9715     0.9715     0.9715     0.9715: 100%|██████████| 552/552 [00:41<00:00, 13.29it/s]


                  Loss


               0.07674: 100%|██████████| 37/37 [00:01<00:00, 24.20it/s]


              precision    recall  f1-score   support

    drowning     0.9771    0.9396    0.9580       182
    swimming     0.9884    0.9884    0.9884       690
  treadwater     0.9555    0.9789    0.9671       285

    accuracy                         0.9784      1157
   macro avg     0.9737    0.9690    0.9712      1157
weighted avg     0.9785    0.9784    0.9784      1157

      Epoch       Loss   Accuracy    Macro(P          R        F1) Weighted(P          R        F1)


      11/15    0.08148      97.14      0.972      0.972      0.972      0.972      0.972      0.972: 100%|██████████| 552/552 [00:41<00:00, 13.27it/s]


                  Loss


               0.08303: 100%|██████████| 37/37 [00:01<00:00, 24.16it/s]


              precision    recall  f1-score   support

    drowning     0.9821    0.9066    0.9429       182
    swimming     0.9856    0.9913    0.9884       690
  treadwater     0.9458    0.9789    0.9621       285

    accuracy                         0.9749      1157
   macro avg     0.9712    0.9589    0.9645      1157
weighted avg     0.9752    0.9749    0.9748      1157

      Epoch       Loss   Accuracy    Macro(P          R        F1) Weighted(P          R        F1)


      12/15    0.08353      97.04      0.971      0.971      0.971      0.971      0.971      0.971: 100%|██████████| 552/552 [00:41<00:00, 13.24it/s]


                  Loss


               0.08593: 100%|██████████| 37/37 [00:01<00:00, 24.04it/s]


              precision    recall  f1-score   support

    drowning     0.9714    0.9341    0.9524       182
    swimming     0.9927    0.9841    0.9884       690
  treadwater     0.9396    0.9825    0.9605       285

    accuracy                         0.9758      1157
   macro avg     0.9679    0.9669    0.9671      1157
weighted avg     0.9763    0.9758    0.9758      1157

      Epoch       Loss   Accuracy    Macro(P          R        F1) Weighted(P          R        F1)


      13/15    0.08039      97.13     0.9719     0.9719     0.9719     0.9719     0.9719     0.9719: 100%|██████████| 552/552 [00:41<00:00, 13.24it/s]


                  Loss


               0.08375: 100%|██████████| 37/37 [00:01<00:00, 23.56it/s]


              precision    recall  f1-score   support

    drowning     0.9770    0.9341    0.9551       182
    swimming     0.9898    0.9855    0.9877       690
  treadwater     0.9426    0.9789    0.9604       285

    accuracy                         0.9758      1157
   macro avg     0.9698    0.9662    0.9677      1157
weighted avg     0.9762    0.9758    0.9758      1157

      Epoch       Loss   Accuracy    Macro(P          R        F1) Weighted(P          R        F1)


      14/15     0.0783      97.22     0.9727     0.9727     0.9727     0.9727     0.9727     0.9727: 100%|██████████| 552/552 [00:41<00:00, 13.29it/s]


                  Loss


               0.08205: 100%|██████████| 37/37 [00:01<00:00, 24.32it/s]


              precision    recall  f1-score   support

    drowning     0.9770    0.9341    0.9551       182
    swimming     0.9913    0.9855    0.9884       690
  treadwater     0.9428    0.9825    0.9622       285

    accuracy                         0.9767      1157
   macro avg     0.9703    0.9673    0.9685      1157
weighted avg     0.9771    0.9767    0.9767      1157

      Epoch       Loss   Accuracy    Macro(P          R        F1) Weighted(P          R        F1)


      15/15    0.07553      97.29     0.9734     0.9734     0.9734     0.9734     0.9734     0.9734: 100%|██████████| 552/552 [00:41<00:00, 13.32it/s]


                  Loss


                0.0798: 100%|██████████| 37/37 [00:01<00:00, 24.53it/s]

              precision    recall  f1-score   support

    drowning     0.9774    0.9505    0.9638       182
    swimming     0.9913    0.9870    0.9891       690
  treadwater     0.9556    0.9825    0.9689       285

    accuracy                         0.9801      1157
   macro avg     0.9748    0.9733    0.9739      1157
weighted avg     0.9803    0.9801    0.9801      1157






In [6]:
results

([{'loss': 0.3291142154754936,
   'accuracy': 87.08342779415099,
   'macro_f1': 0.870964555944839,
   'weighted_f1': 0.8709645559448391,
   'macro_precision': 0.8707308151546386,
   'weighted_precision': 0.8707308151546386,
   'macro_recall': 0.8713281161392764,
   'weighted_recall': 0.8713281161392764},
  {'loss': 0.18564937731075654,
   'accuracy': 93.32350940829744,
   'macro_f1': 0.9337688310940582,
   'weighted_f1': 0.9337688310940583,
   'macro_precision': 0.9337750740331048,
   'weighted_precision': 0.9337750740331048,
   'macro_recall': 0.9337643189293411,
   'weighted_recall': 0.9337643189293411},
  {'loss': 0.16269371990178563,
   'accuracy': 94.27000680117887,
   'macro_f1': 0.9432435492403695,
   'weighted_f1': 0.9432435492403695,
   'macro_precision': 0.9432749495171722,
   'weighted_precision': 0.9432749495171721,
   'macro_recall': 0.9432346603153,
   'weighted_recall': 0.9432346603153},
  {'loss': 0.1514506953569345,
   'accuracy': 94.88778054862843,
   'macro_f1': 0.94

In [8]:
model.save(
    os.path.join(os.getenv('CNN_MODEL_DIR'), 'test.pt')
)

Model saved to C:/Users/PC/Downloads/Jie Shen/Drowning-Detection/models/classification/CNN\test.pt


In [7]:
# Add model prediction here
loaded_model = TorchClassifier(model='CNNClassifier', model_path=os.path.join(os.getenv('CNN_MODEL_DIR'), 'test.pt'), device='auto', verbose=False)

img = Image.open(rf'C:\Users\hp\Downloads\Drowning-Detection\data\roi\val\drowning\{str(63).zfill(6)}.jpg')

class RGBToHSV:
    def __call__(self, img):
        # Ensure the image is in PIL format before converting
        if isinstance(img, torch.Tensor):
            img = transforms.ToPILImage()(img)  # Convert tensor to PIL image
        
        # Convert the image to HSV using PIL
        img_hsv = img.convert("HSV")
        
        return img_hsv

val_transform = transforms.Compose([
    transforms.Resize((128, 128)),
    RGBToHSV(),
    transforms.ToTensor()
])

prediction = loaded_model(
    img=img,
    transform=val_transform,
    prob=True
).cpu().numpy()

IDX_TO_CLASS2 = {
    0: 'drowning',
    1: 'swimming',
    2: 'treadwater'
}

print(prediction)
print(IDX_TO_CLASS2[np.argmax(prediction)])

loaded_model_results = loaded_model.validate(
    data_path=os.getenv('ROI_DATA_DIR'),
    transform=val_transform,
    val_test_ratio=0.5
)

[0.6086047  0.12980805 0.26158726]
drowning


Validation: 100%|██████████| 37/37 [00:16<00:00,  2.27it/s]

              precision    recall  f1-score   support

    drowning     0.9730    0.9890    0.9809       182
    swimming     0.9913    0.9942    0.9928       690
  treadwater     0.9786    0.9614    0.9699       285

    accuracy                         0.9853      1157
   macro avg     0.9810    0.9815    0.9812      1157
weighted avg     0.9853    0.9853    0.9853      1157




