# Detection with CNNClassifier


In [1]:
from dotenv import load_dotenv

load_dotenv()

import sys
import os

sys.path.append(os.getenv('SRC_DIR'))

from utils.system import display_system_info

display_system_info(markdown=True)


**Last Updated**: 2025-04-19 04:04:08

**Python Version**: 3.11.11  
**OS**: Windows 10.0.26100  
**Architecture**: 64bit  
**Hostname**: ShenLaptop  
**Processor**: Intel64 Family 6 Model 186 Stepping 3, GenuineIntel  
**RAM Size**: 15.65 GB  
  
        

In [9]:
import cv2

IDX_TO_CLASS = {
    0: 'swimming',
    1: 'treadwater',
    2: 'drowning'
}

def xywhn_to_xyxy(image, x_n, y_n, w_n, h_n):
    # cv2 images in HWC
    height, width, _ = image.shape

    x1 = int((x_n - w_n / 2) * width)
    y1 = int((y_n - h_n / 2) * height)
    x2 = int((x_n + w_n / 2) * width)
    y2 = int((y_n + h_n / 2) * height)

    return x1, y1, x2, y2

def cop_save_roi(images_folder, labels_folder, output_folder, idx_to_class):
    os.makedirs(output_folder, exist_ok=True)
    for class_id, class_name in idx_to_class.items():
        os.makedirs(os.path.join(output_folder, class_name), exist_ok=True)
        
    class_counters = {class_id: 0 for class_id in idx_to_class.keys()}

    for image_file in os.listdir(images_folder):
        if image_file.endswith('.jpg'):
            base_name = os.path.splitext(image_file)[0]
            
            label_file = os.path.join(labels_folder, f"{base_name}.txt")
            image_path = os.path.join(images_folder, image_file)
            
            image = cv2.imread(image_path)
            
            # Read the label file
            with open(label_file, 'r') as file:
                lines = file.readlines()
            
            # Loop through each line in the label file
            for line in lines:
                values = line.strip().split()
                class_id = int(values[0])
                x_n, y_n, w_n, h_n = map(float, values[1:])

                x1, y1, x2, y2 = xywhn_to_xyxy(image, x_n, y_n, w_n, h_n)

                roi = image[y1:y2, x1:x2]

                cv2.imwrite(os.path.join(output_folder, idx_to_class[class_id], f"{str(class_counters[class_id]).zfill(6)}.jpg"), roi)

                class_counters[class_id] += 1

In [10]:
for split in ['train', 'val']:
    images_folder = os.path.join(os.getenv('RAW_DATA_DIR'), 'images', split)
    labels_folder = os.path.join(os.getenv('RAW_DATA_DIR'), 'labels', split)
    output_folder = os.path.join(os.getenv('ROI_DATA_DIR'), split)

    cop_save_roi(images_folder, labels_folder, output_folder, IDX_TO_CLASS)

In [2]:
from classify import TorchClassifier

model_config = {
    'num_classes': 3,
    'num_blocks': 4,
    'first_out_channel': 32,
    'out_channel_multiplier': 2,
    'kernel_size': 3,
    'stride': 1,
    'padding': 1,
    'input_shape': (3, 128, 128)
}

model = TorchClassifier(model='CNNClassifier', config=model_config, verbose=True)

New model created: CNNClassifier(
  (features): Sequential(
    (0): ConvBlock(
      (block): Sequential(
        (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): ReLU(inplace=True)
        (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
      )
    )
    (1): ConvBlock(
      (block): Sequential(
        (0): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): ReLU(inplace=True)
        (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
      )
    )
    (2): ConvBlock(
      (block): Sequential(
        (0): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, trac

In [None]:
from torchvision import transforms
import torch
import cv2
from PIL import Image
import numpy as np

class RandomAffineWithInpainting:
    def __init__(self, degrees=0, translate=(0.2, 0.2), radius=3):
        self.affine = transforms.RandomAffine(degrees=degrees, translate=translate)
        self.inpaint_radius = radius

    def __call__(self, img):
        # Step 1: Apply affine transform (returns PIL image)
        img = self.affine(img)

        # Step 2: Convert to NumPy array
        img_np = np.array(img)

        # return Image.fromarray(img_np)

        # Step 3: Create mask where pixels are black
        gray = cv2.cvtColor(img_np, cv2.COLOR_RGB2GRAY)
        mask = cv2.inRange(gray, 0, 1)

        # Step 4: Inpaint using OpenCV
        inpainted_np = cv2.inpaint(img_np, mask, self.inpaint_radius, cv2.INPAINT_TELEA)

        # Step 5: Convert back to PIL
        img_inpainted = Image.fromarray(inpainted_np)

        return img_inpainted

class CustomTransformation(object):
    def __call__(self, img):
        # Ensure the image is in PIL format before converting
        if isinstance(img, torch.Tensor):
            img = transforms.ToPILImage()(img)  # Convert tensor to PIL image
        
        # Convert the image to HSV using PIL
        img_hsv = np.array(img.convert("RGB"))  # Convert to numpy array (RGB)
        img_hsv = cv2.cvtColor(img_hsv, cv2.COLOR_RGB2HSV)  # Convert to HSV
        
        # Split the HSV image into H, S, and V channels
        h, s, v = cv2.split(img_hsv)
        
        # Apply average blurring to the V channel
        v_blurred = cv2.blur(v, (7, 7))  # Apply 3x3 average blur to the V channel
        
        # Merge the H, S, and blurred V channels back together
        img_hsv_blurred = cv2.merge([h, s, v_blurred])
        
        # Convert the image back to PIL format
        return Image.fromarray(img_hsv_blurred)

aug_transform = transforms.Compose([
    transforms.RandomHorizontalFlip(p=1.0),
    RandomAffineWithInpainting(degrees=0, translate=(0.2, 0.2))
])

enhance_transform = transforms.Compose([
    transforms.Resize((128, 128)),
    CustomTransformation(),
    transforms.ToTensor()
])

val_transform = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.ToTensor()
])

results = model.train(
    data_path=os.getenv('ROI_DATA_DIR'),
    imbalance=True,
    fraction=0.01,
    val_test_ratio=0.5,
    input_size=128,
    optimizer='Adam',
    lr=1e-4,
    aug_transform=aug_transform,
    enhance_transform=enhance_transform,
    val_transform=val_transform,
    batch_size=32,
    epochs=5
)

      Epoch       Loss   Accuracy    Macro(P          R        F1) Weighted(P          R        F1)


        1/5       1.26      35.33     0.3644     0.3736     0.3638     0.3644     0.3736     0.3638: 100%|██████████| 6/6 [00:07<00:00,  1.17s/it]


                  Loss


                 1.093: 100%|██████████| 37/37 [00:14<00:00,  2.50it/s]


              precision    recall  f1-score   support

    drowning     0.1573    1.0000    0.2718       182
    swimming     0.0000    0.0000    0.0000       690
  treadwater     0.0000    0.0000    0.0000       285

    accuracy                         0.1573      1157
   macro avg     0.0524    0.3333    0.0906      1157
weighted avg     0.0247    0.1573    0.0428      1157

      Epoch       Loss   Accuracy    Macro(P          R        F1) Weighted(P          R        F1)


        2/5     0.8506      55.43     0.6149     0.5862     0.5841     0.6149     0.5862     0.5841: 100%|██████████| 6/6 [00:06<00:00,  1.13s/it]


                  Loss


                 1.203: 100%|██████████| 37/37 [00:15<00:00,  2.45it/s]


              precision    recall  f1-score   support

    drowning     0.1573    1.0000    0.2718       182
    swimming     0.0000    0.0000    0.0000       690
  treadwater     0.0000    0.0000    0.0000       285

    accuracy                         0.1573      1157
   macro avg     0.0524    0.3333    0.0906      1157
weighted avg     0.0247    0.1573    0.0428      1157

      Epoch       Loss   Accuracy    Macro(P          R        F1) Weighted(P          R        F1)


        3/5     0.7105      65.22     0.6946     0.6897     0.6836     0.6946     0.6897     0.6836: 100%|██████████| 6/6 [00:06<00:00,  1.12s/it]


                  Loss


                 1.139: 100%|██████████| 37/37 [00:14<00:00,  2.49it/s]


              precision    recall  f1-score   support

    drowning     0.1573    1.0000    0.2718       182
    swimming     0.0000    0.0000    0.0000       690
  treadwater     0.0000    0.0000    0.0000       285

    accuracy                         0.1573      1157
   macro avg     0.0524    0.3333    0.0906      1157
weighted avg     0.0247    0.1573    0.0428      1157

      Epoch       Loss   Accuracy    Macro(P          R        F1) Weighted(P          R        F1)


        4/5     0.4959      76.63     0.8149     0.8103     0.8108     0.8149     0.8103     0.8108: 100%|██████████| 6/6 [00:06<00:00,  1.10s/it]


                  Loss


                 1.238: 100%|██████████| 37/37 [00:14<00:00,  2.49it/s]


              precision    recall  f1-score   support

    drowning     0.1573    1.0000    0.2718       182
    swimming     0.0000    0.0000    0.0000       690
  treadwater     0.0000    0.0000    0.0000       285

    accuracy                         0.1573      1157
   macro avg     0.0524    0.3333    0.0906      1157
weighted avg     0.0247    0.1573    0.0428      1157

      Epoch       Loss   Accuracy    Macro(P          R        F1) Weighted(P          R        F1)


        5/5      0.489      73.91     0.7819     0.7816     0.7816     0.7819     0.7816     0.7816: 100%|██████████| 6/6 [00:06<00:00,  1.10s/it]


                  Loss


                 1.218: 100%|██████████| 37/37 [00:15<00:00,  2.40it/s]

              precision    recall  f1-score   support

    drowning     0.1576    1.0000    0.2723       182
    swimming     1.0000    0.0029    0.0058       690
  treadwater     0.0000    0.0000    0.0000       285

    accuracy                         0.1590      1157
   macro avg     0.3859    0.3343    0.0927      1157
weighted avg     0.6212    0.1590    0.0463      1157






In [4]:
model.save(
    os.path.join(os.getenv('CNN_MODEL_DIR'), 'test.pt')
)

Model saved to C:\Users\hp\Downloads\Drowning-Detection\models\CNN\test.pt


In [None]:
# Add model prediction here