# Detection with CNNClassifier


In [1]:
from dotenv import load_dotenv

load_dotenv()

import sys
import os

sys.path.append(os.getenv('SRC_DIR'))

from utils.system import display_system_info

display_system_info(markdown=True)


**Last Updated**: 2025-05-01 20:06:51

**Python Version**: 3.11.11  
**OS**: Windows 10.0.26100  
**Architecture**: 64bit  
**Hostname**: DESKTOP-42J9AQP  
**Processor**: Intel64 Family 6 Model 183 Stepping 1, GenuineIntel  
**RAM Size**: 63.85 GB  
  
        

In [2]:
from torchvision import transforms
import torch
import cv2
from PIL import Image
import numpy as np

from classify import TorchClassifier

In [3]:
IDX_TO_CLASS = {
    0: 'swimming',
    1: 'treadwater',
    2: 'drowning'
}

def xywhn_to_xyxy(image, x_n, y_n, w_n, h_n):
    # cv2 images in HWC
    height, width, _ = image.shape

    x1 = int((x_n - w_n / 2) * width)
    y1 = int((y_n - h_n / 2) * height)
    x2 = int((x_n + w_n / 2) * width)
    y2 = int((y_n + h_n / 2) * height)

    return x1, y1, x2, y2

def cop_save_roi(images_folder, labels_folder, output_folder, idx_to_class):
    os.makedirs(output_folder, exist_ok=True)
    for class_id, class_name in idx_to_class.items():
        os.makedirs(os.path.join(output_folder, class_name), exist_ok=True)
        
    class_counters = {class_id: 0 for class_id in idx_to_class.keys()}

    for image_file in os.listdir(images_folder):
        if image_file.endswith('.jpg'):
            base_name = os.path.splitext(image_file)[0]
            
            label_file = os.path.join(labels_folder, f"{base_name}.txt")
            image_path = os.path.join(images_folder, image_file)
            
            image = cv2.imread(image_path)
            
            # Read the label file
            with open(label_file, 'r') as file:
                lines = file.readlines()
            
            # Loop through each line in the label file
            for line in lines:
                values = line.strip().split()
                class_id = int(values[0])
                x_n, y_n, w_n, h_n = map(float, values[1:])

                x1, y1, x2, y2 = xywhn_to_xyxy(image, x_n, y_n, w_n, h_n)

                roi = image[y1:y2, x1:x2]

                cv2.imwrite(os.path.join(output_folder, idx_to_class[class_id], f"{str(class_counters[class_id]).zfill(6)}.jpg"), roi)

                class_counters[class_id] += 1

In [4]:
for split in ['train', 'val']:
    images_folder = os.path.join(os.getenv('RAW_DATA_DIR'), 'images', split)
    labels_folder = os.path.join(os.getenv('RAW_DATA_DIR'), 'labels', split)
    output_folder = os.path.join(os.getenv('ROI_DATA_DIR'), split)

    cop_save_roi(images_folder, labels_folder, output_folder, IDX_TO_CLASS)

In [4]:
model_config = {
    'num_classes': 3,
    'num_blocks': 4,
    'first_out_channel': 32,
    'out_channel_multiplier': 2,
    'kernel_size': 3,
    'stride': 1,
    'padding': 1,
    'input_shape': (3, 128, 128)
}

model = TorchClassifier(model='CNNClassifier', config=model_config, device='auto', verbose=True)

New model created: CNNClassifier(
  (features): Sequential(
    (0): ConvBlock(
      (block): Sequential(
        (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): ReLU(inplace=True)
        (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
      )
    )
    (1): ConvBlock(
      (block): Sequential(
        (0): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): ReLU(inplace=True)
        (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
      )
    )
    (2): ConvBlock(
      (block): Sequential(
        (0): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, trac

In [5]:
# class RandomAffineWithInpainting:
#     def __init__(self, degrees=10, translate=(0.1, 0.1), radius=3):
#         self.affine = transforms.RandomAffine(degrees=degrees, translate=translate)
#         self.inpaint_radius = radius

#     def __call__(self, img):
#         # Step 1: Apply affine transform (returns PIL image)
#         img = self.affine(img)

#         # Step 2: Convert to NumPy array
#         img_np = np.array(img)

#         return Image.fromarray(img_np)

#         # Step 3: Create mask where pixels are black
#         gray = cv2.cvtColor(img_np, cv2.COLOR_RGB2GRAY)
#         mask = cv2.inRange(gray, 0, 1)

#         # Step 4: Inpaint using OpenCV
#         inpainted_np = cv2.inpaint(img_np, mask, self.inpaint_radius, cv2.INPAINT_TELEA)

#         # Step 5: Convert back to PIL
#         img_inpainted = Image.fromarray(inpainted_np)

#         return img_inpainted
    
class RGBToHSV:
    def __call__(self, img):
        # Ensure the image is in PIL format before converting
        if isinstance(img, torch.Tensor):
            img = transforms.ToPILImage()(img)  # Convert tensor to PIL image
        
        # Convert the image to HSV using PIL
        img_hsv = img.convert("HSV")
        
        return img_hsv
    
class VBlurring:
    def __call__(self, img):
        # Ensure the image is in PIL format before converting
        if isinstance(img, torch.Tensor):
            img = transforms.ToPILImage()(img)

        # apply blurring to the V channel of the HSV image
        h, s, v = cv2.split(np.array(img))

        v_blurred = cv2.blur(v, (7, 7))  # Apply 3x3 average blur to the V channel

        img_hsv_blurred = cv2.merge([h, s, v_blurred])

        return Image.fromarray(img_hsv_blurred)

aug_transform = transforms.Compose([
    transforms.RandomHorizontalFlip(p=1.0),
    transforms.RandomAffine(degrees=10, translate=(0.1, 0.1))
])

enhance_transform = transforms.Compose([
    transforms.Resize((128, 128)),
    RGBToHSV(),
    # VBlurring(),
    # May addd certain non-deterministic augmentations here to improve generalisability
    transforms.ToTensor()
])

val_transform = transforms.Compose([
    transforms.Resize((128, 128)),
    RGBToHSV(),
    transforms.ToTensor()
])

In [6]:
results = model.train(
    data_path=os.getenv('ROI_DATA_DIR'),
    imbalance=True,
    fraction=1,
    val_test_ratio=0.5,
    input_size=128,
    optimizer='Adam',
    lr=1e-4,
    aug_transform=aug_transform,
    enhance_transform=enhance_transform,
    val_transform=val_transform,
    batch_size=32,
    epochs=10
)

      Epoch       Loss   Accuracy    Macro(P          R        F1) Weighted(P          R        F1)


       1/10     0.3267      87.19     0.8719     0.8724     0.8721     0.8719     0.8724     0.8721: 100%|██████████| 552/552 [00:35<00:00, 15.46it/s]


                  Loss


                0.1123: 100%|██████████| 37/37 [00:01<00:00, 24.34it/s]


              precision    recall  f1-score   support

    drowning     0.9180    0.9231    0.9205       182
    swimming     0.9898    0.9812    0.9854       690
  treadwater     0.9345    0.9509    0.9426       285

    accuracy                         0.9646      1157
   macro avg     0.9474    0.9517    0.9495      1157
weighted avg     0.9649    0.9646    0.9647      1157

      Epoch       Loss   Accuracy    Macro(P          R        F1) Weighted(P          R        F1)


       2/10     0.1904      93.31     0.9336     0.9336     0.9336     0.9336     0.9336     0.9336: 100%|██████████| 552/552 [00:36<00:00, 15.24it/s]


                  Loss


                0.1007: 100%|██████████| 37/37 [00:01<00:00, 24.48it/s]


              precision    recall  f1-score   support

    drowning     0.8838    0.9615    0.9211       182
    swimming     0.9883    0.9783    0.9832       690
  treadwater     0.9529    0.9228    0.9376       285

    accuracy                         0.9620      1157
   macro avg     0.9417    0.9542    0.9473      1157
weighted avg     0.9631    0.9620    0.9622      1157

      Epoch       Loss   Accuracy    Macro(P          R        F1) Weighted(P          R        F1)


       3/10     0.1623      94.36     0.9441     0.9441     0.9441     0.9441     0.9441     0.9441: 100%|██████████| 552/552 [00:36<00:00, 15.22it/s]


                  Loss


               0.09829: 100%|██████████| 37/37 [00:01<00:00, 24.64it/s]


              precision    recall  f1-score   support

    drowning     0.9767    0.9231    0.9492       182
    swimming     0.9717    0.9957    0.9835       690
  treadwater     0.9676    0.9439    0.9556       285

    accuracy                         0.9715      1157
   macro avg     0.9720    0.9542    0.9628      1157
weighted avg     0.9715    0.9715    0.9712      1157

      Epoch       Loss   Accuracy    Macro(P          R        F1) Weighted(P          R        F1)


       4/10     0.1438         95     0.9506     0.9505     0.9506     0.9506     0.9505     0.9506: 100%|██████████| 552/552 [00:36<00:00, 15.24it/s]


                  Loss


               0.08578: 100%|██████████| 37/37 [00:01<00:00, 24.13it/s]


              precision    recall  f1-score   support

    drowning     0.9348    0.9451    0.9399       182
    swimming     0.9856    0.9913    0.9884       690
  treadwater     0.9642    0.9439    0.9539       285

    accuracy                         0.9723      1157
   macro avg     0.9615    0.9601    0.9607      1157
weighted avg     0.9723    0.9723    0.9723      1157

      Epoch       Loss   Accuracy    Macro(P          R        F1) Weighted(P          R        F1)


       5/10     0.1365      95.08     0.9513     0.9513     0.9513     0.9513     0.9513     0.9513: 100%|██████████| 552/552 [00:35<00:00, 15.38it/s]


                  Loss


               0.08818: 100%|██████████| 37/37 [00:01<00:00, 24.16it/s]


              precision    recall  f1-score   support

    drowning     0.9067    0.9615    0.9333       182
    swimming     0.9912    0.9826    0.9869       690
  treadwater     0.9643    0.9474    0.9558       285

    accuracy                         0.9706      1157
   macro avg     0.9541    0.9638    0.9587      1157
weighted avg     0.9713    0.9706    0.9708      1157

      Epoch       Loss   Accuracy    Macro(P          R        F1) Weighted(P          R        F1)


       6/10     0.1065      96.29     0.9634     0.9634     0.9634     0.9634     0.9634     0.9634: 100%|██████████| 552/552 [00:36<00:00, 15.30it/s]


                  Loss


               0.07074: 100%|██████████| 37/37 [00:01<00:00, 24.63it/s]


              precision    recall  f1-score   support

    drowning     0.9451    0.9451    0.9451       182
    swimming     0.9913    0.9870    0.9891       690
  treadwater     0.9583    0.9684    0.9634       285

    accuracy                         0.9758      1157
   macro avg     0.9649    0.9668    0.9658      1157
weighted avg     0.9759    0.9758    0.9758      1157

      Epoch       Loss   Accuracy    Macro(P          R        F1) Weighted(P          R        F1)


       7/10    0.09553      96.74      0.968     0.9679     0.9679      0.968     0.9679     0.9679: 100%|██████████| 552/552 [00:35<00:00, 15.47it/s]


                  Loss


               0.06993: 100%|██████████| 37/37 [00:01<00:00, 24.38it/s]


              precision    recall  f1-score   support

    drowning     0.9560    0.9560    0.9560       182
    swimming     0.9927    0.9870    0.9898       690
  treadwater     0.9619    0.9754    0.9686       285

    accuracy                         0.9793      1157
   macro avg     0.9702    0.9728    0.9715      1157
weighted avg     0.9794    0.9793    0.9793      1157

      Epoch       Loss   Accuracy    Macro(P          R        F1) Weighted(P          R        F1)


       8/10    0.09536      96.73     0.9679     0.9678     0.9679     0.9679     0.9678     0.9679: 100%|██████████| 552/552 [00:36<00:00, 15.22it/s]


                  Loss


                0.0684: 100%|██████████| 37/37 [00:01<00:00, 24.46it/s]


              precision    recall  f1-score   support

    drowning     0.9457    0.9560    0.9508       182
    swimming     0.9956    0.9870    0.9913       690
  treadwater     0.9619    0.9754    0.9686       285

    accuracy                         0.9793      1157
   macro avg     0.9677    0.9728    0.9702      1157
weighted avg     0.9795    0.9793    0.9793      1157

      Epoch       Loss   Accuracy    Macro(P          R        F1) Weighted(P          R        F1)


       9/10    0.08913      96.91     0.9697     0.9697     0.9697     0.9697     0.9697     0.9697: 100%|██████████| 552/552 [00:36<00:00, 15.29it/s]


                  Loss


               0.06831: 100%|██████████| 37/37 [00:01<00:00, 24.14it/s]


              precision    recall  f1-score   support

    drowning     0.9560    0.9560    0.9560       182
    swimming     0.9927    0.9899    0.9913       690
  treadwater     0.9686    0.9754    0.9720       285

    accuracy                         0.9810      1157
   macro avg     0.9725    0.9738    0.9731      1157
weighted avg     0.9810    0.9810    0.9810      1157

      Epoch       Loss   Accuracy    Macro(P          R        F1) Weighted(P          R        F1)


      10/10    0.08761      96.96     0.9702     0.9701     0.9701     0.9702     0.9701     0.9701: 100%|██████████| 552/552 [00:36<00:00, 15.26it/s]


                  Loss


               0.06596: 100%|██████████| 37/37 [00:01<00:00, 24.50it/s]

              precision    recall  f1-score   support

    drowning     0.9511    0.9615    0.9563       182
    swimming     0.9927    0.9855    0.9891       690
  treadwater     0.9653    0.9754    0.9703       285

    accuracy                         0.9793      1157
   macro avg     0.9697    0.9742    0.9719      1157
weighted avg     0.9794    0.9793    0.9793      1157






In [7]:
results

([{'loss': 0.3265977770063108,
   'accuracy': 87.19111312627523,
   'macro_f1': 0.8720872035869585,
   'weighted_f1': 0.8720872035869585,
   'macro_precision': 0.8718794615384224,
   'weighted_precision': 0.8718794615384224,
   'macro_recall': 0.8724055801292957,
   'weighted_recall': 0.8724055801292957},
  {'loss': 0.1903235629821817,
   'accuracy': 93.3121741101791,
   'macro_f1': 0.9336494056892463,
   'weighted_f1': 0.9336494056892461,
   'macro_precision': 0.9336485547356398,
   'weighted_precision': 0.9336485547356397,
   'macro_recall': 0.9336509016672337,
   'weighted_recall': 0.9336509016672337},
  {'loss': 0.1621121565652741,
   'accuracy': 94.3606891861256,
   'macro_f1': 0.9441387033726253,
   'weighted_f1': 0.9441387033726253,
   'macro_precision': 0.9441552185534495,
   'weighted_precision': 0.9441552185534494,
   'macro_recall': 0.9441419984121584,
   'weighted_recall': 0.9441419984121583},
  {'loss': 0.143613475143734,
   'accuracy': 95.00113352981184,
   'macro_f1': 0.

In [8]:
model.save(
    os.path.join(os.getenv('CNN_MODEL_DIR'), 'test_new.pt')
)

Model saved to C:\Users\PC\Downloads\JieShen\Drowning-Detection/models/classification/CNN\test_new.pt


In [14]:
# Add model prediction here
loaded_model = TorchClassifier(model='CNNClassifier', model_path=os.path.join(os.getenv('CNN_MODEL_DIR'), 'test_new.pt'), device='auto', verbose=False)

img = Image.open(rf'C:\Users\PC\Downloads\JieShen\Drowning-Detection\data\roi\val\drowning\{str(63).zfill(6)}.jpg')

class RGBToHSV:
    def __call__(self, img):
        # Ensure the image is in PIL format before converting
        if isinstance(img, torch.Tensor):
            img = transforms.ToPILImage()(img)  # Convert tensor to PIL image
        
        # Convert the image to HSV using PIL
        img_hsv = img.convert("HSV")
        
        return img_hsv

val_transform = transforms.Compose([
    transforms.Resize((128, 128)),
    RGBToHSV(),
    transforms.ToTensor()
])

prediction = loaded_model(
    img=img,
    transform=val_transform,
    prob=True
).cpu().numpy()

IDX_TO_CLASS2 = {
    0: 'drowning',
    1: 'swimming',
    2: 'treadwater'
}

print(prediction)
print(IDX_TO_CLASS2[np.argmax(prediction)])

loaded_model_results = loaded_model.validate(
    data_path=os.getenv('ROI_DATA_DIR'),
    transform=val_transform,
    val_test_ratio=0.5
)

[0.8826206  0.00696201 0.11041742]
drowning


Validation: 100%|██████████| 37/37 [00:01<00:00, 24.69it/s]


              precision    recall  f1-score   support

    drowning     0.9677    0.9890    0.9783       182
    swimming     0.9928    0.9928    0.9928       690
  treadwater     0.9751    0.9614    0.9682       285

    accuracy                         0.9844      1157
   macro avg     0.9785    0.9811    0.9797      1157
weighted avg     0.9845    0.9844    0.9844      1157

