# Detection with CNNClassifier


In [1]:
from dotenv import load_dotenv

load_dotenv()

import sys
import os

sys.path.append(os.getenv('SRC_DIR'))

from utils.system import display_system_info

display_system_info(markdown=True)


**Last Updated**: 2025-04-19 13:28:00

**Python Version**: 3.11.11  
**OS**: Windows 10.0.26100  
**Architecture**: 64bit  
**Hostname**: ShenLaptop  
**Processor**: Intel64 Family 6 Model 186 Stepping 3, GenuineIntel  
**RAM Size**: 15.65 GB  
  
        

In [9]:
import cv2

IDX_TO_CLASS = {
    0: 'swimming',
    1: 'treadwater',
    2: 'drowning'
}

def xywhn_to_xyxy(image, x_n, y_n, w_n, h_n):
    # cv2 images in HWC
    height, width, _ = image.shape

    x1 = int((x_n - w_n / 2) * width)
    y1 = int((y_n - h_n / 2) * height)
    x2 = int((x_n + w_n / 2) * width)
    y2 = int((y_n + h_n / 2) * height)

    return x1, y1, x2, y2

def cop_save_roi(images_folder, labels_folder, output_folder, idx_to_class):
    os.makedirs(output_folder, exist_ok=True)
    for class_id, class_name in idx_to_class.items():
        os.makedirs(os.path.join(output_folder, class_name), exist_ok=True)
        
    class_counters = {class_id: 0 for class_id in idx_to_class.keys()}

    for image_file in os.listdir(images_folder):
        if image_file.endswith('.jpg'):
            base_name = os.path.splitext(image_file)[0]
            
            label_file = os.path.join(labels_folder, f"{base_name}.txt")
            image_path = os.path.join(images_folder, image_file)
            
            image = cv2.imread(image_path)
            
            # Read the label file
            with open(label_file, 'r') as file:
                lines = file.readlines()
            
            # Loop through each line in the label file
            for line in lines:
                values = line.strip().split()
                class_id = int(values[0])
                x_n, y_n, w_n, h_n = map(float, values[1:])

                x1, y1, x2, y2 = xywhn_to_xyxy(image, x_n, y_n, w_n, h_n)

                roi = image[y1:y2, x1:x2]

                cv2.imwrite(os.path.join(output_folder, idx_to_class[class_id], f"{str(class_counters[class_id]).zfill(6)}.jpg"), roi)

                class_counters[class_id] += 1

In [10]:
for split in ['train', 'val']:
    images_folder = os.path.join(os.getenv('RAW_DATA_DIR'), 'images', split)
    labels_folder = os.path.join(os.getenv('RAW_DATA_DIR'), 'labels', split)
    output_folder = os.path.join(os.getenv('ROI_DATA_DIR'), split)

    cop_save_roi(images_folder, labels_folder, output_folder, IDX_TO_CLASS)

In [None]:
from classify import TorchClassifier

model_config = {
    'num_classes': 3,
    'num_blocks': 4,
    'first_out_channel': 32,
    'out_channel_multiplier': 2,
    'kernel_size': 3,
    'stride': 1,
    'padding': 1,
    'input_shape': (3, 128, 128)
}

model = TorchClassifier(model='CNNClassifier', config=model_config, device='auto', verbose=True)

RuntimeError: Found no NVIDIA driver on your system. Please check that you have an NVIDIA GPU and installed a driver from http://www.nvidia.com/Download/index.aspx

In [None]:
from torchvision import transforms
import torch
import cv2
from PIL import Image
import numpy as np

class RandomAffineWithInpainting:
    def __init__(self, degrees=10, translate=(0.1, 0.1), radius=3):
        self.affine = transforms.RandomAffine(degrees=degrees, translate=translate)
        self.inpaint_radius = radius

    def __call__(self, img):
        # Step 1: Apply affine transform (returns PIL image)
        img = self.affine(img)

        # Step 2: Convert to NumPy array
        img_np = np.array(img)

        return Image.fromarray(img_np)

        # Step 3: Create mask where pixels are black
        gray = cv2.cvtColor(img_np, cv2.COLOR_RGB2GRAY)
        mask = cv2.inRange(gray, 0, 1)

        # Step 4: Inpaint using OpenCV
        inpainted_np = cv2.inpaint(img_np, mask, self.inpaint_radius, cv2.INPAINT_TELEA)

        # Step 5: Convert back to PIL
        img_inpainted = Image.fromarray(inpainted_np)

        return img_inpainted
    
class RGBToHSV(object):
    def __call__(self, img):
        # Ensure the image is in PIL format before converting
        if isinstance(img, torch.Tensor):
            img = transforms.ToPILImage()(img)  # Convert tensor to PIL image
        
        # Convert the image to HSV using PIL
        img_hsv = img.convert("HSV")
        
        return img_hsv
    
class VBlurring(object):
    def __call__(self, img):
        # Ensure the image is in PIL format before converting
        if isinstance(img, torch.Tensor):
            img = transforms.ToPILImage()(img)

        # apply blurring to the V channel of the HSV image
        h, s, v = cv2.split(np.array(img))

        v_blurred = cv2.blur(v, (7, 7))  # Apply 3x3 average blur to the V channel

        img_hsv_blurred = cv2.merge([h, s, v_blurred])

        return Image.fromarray(img_hsv_blurred)

aug_transform = transforms.Compose([
    transforms.RandomHorizontalFlip(p=1.0),
    RandomAffineWithInpainting(degrees=0, translate=(0.2, 0.2))
])

enhance_transform = transforms.Compose([
    transforms.Resize((128, 128)),
    RGBToHSV(),
    VBlurring(),
    # May addd certain non-deterministic augmentations here to improve generalisability
    transforms.ToTensor()
])

val_transform = transforms.Compose([
    transforms.Resize((128, 128)),
    RGBToHSV(),
    transforms.ToTensor()
])

results = model.train(
    data_path=os.getenv('ROI_DATA_DIR'),
    imbalance=True,
    fraction=1,
    val_test_ratio=0.5,
    input_size=128,
    optimizer='Adam',
    lr=1e-4,
    aug_transform=aug_transform,
    enhance_transform=enhance_transform,
    val_transform=val_transform,
    batch_size=32,
    epochs=5
)

      Epoch       Loss   Accuracy    Macro(P          R        F1) Weighted(P          R        F1)


        1/5      0.677       69.9     0.7012      0.703     0.7016     0.7012      0.703     0.7016: 100%|██████████| 56/56 [01:13<00:00,  1.31s/it]


                  Loss


                0.4093: 100%|██████████| 37/37 [00:21<00:00,  1.74it/s]


              precision    recall  f1-score   support

    drowning     0.8344    0.6923    0.7568       182
    swimming     0.8635    0.9623    0.9102       690
  treadwater     0.8903    0.7404    0.8084       285

    accuracy                         0.8652      1157
   macro avg     0.8627    0.7983    0.8251      1157
weighted avg     0.8655    0.8652    0.8610      1157

      Epoch       Loss   Accuracy    Macro(P          R        F1) Weighted(P          R        F1)


        2/5     0.3847       84.3     0.8469     0.8478     0.8471     0.8469     0.8478     0.8471: 100%|██████████| 56/56 [01:42<00:00,  1.84s/it]


                  Loss


                0.2797: 100%|██████████| 37/37 [00:20<00:00,  1.80it/s]


              precision    recall  f1-score   support

    drowning     0.9412    0.6154    0.7442       182
    swimming     0.9307    0.9536    0.9420       690
  treadwater     0.7946    0.9228    0.8539       285

    accuracy                         0.8928      1157
   macro avg     0.8888    0.8306    0.8467      1157
weighted avg     0.8988    0.8928    0.8892      1157

      Epoch       Loss   Accuracy    Macro(P          R        F1) Weighted(P          R        F1)


        3/5     0.2971      87.13     0.8759     0.8762      0.876     0.8759     0.8762      0.876: 100%|██████████| 56/56 [01:11<00:00,  1.27s/it]


                  Loss


                0.3111: 100%|██████████| 37/37 [00:20<00:00,  1.77it/s]


              precision    recall  f1-score   support

    drowning     0.8671    0.8242    0.8451       182
    swimming     0.9774    0.8768    0.9244       690
  treadwater     0.7452    0.9544    0.8369       285

    accuracy                         0.8876      1157
   macro avg     0.8632    0.8851    0.8688      1157
weighted avg     0.9028    0.8876    0.8904      1157

      Epoch       Loss   Accuracy    Macro(P          R        F1) Weighted(P          R        F1)


        4/5       0.29      88.42      0.889     0.8893     0.8891      0.889     0.8893     0.8891: 100%|██████████| 56/56 [01:12<00:00,  1.29s/it]


                  Loss


                0.1763: 100%|██████████| 37/37 [00:20<00:00,  1.77it/s]


              precision    recall  f1-score   support

    drowning     0.9222    0.8462    0.8825       182
    swimming     0.9667    0.9667    0.9667       690
  treadwater     0.8933    0.9404    0.9162       285

    accuracy                         0.9412      1157
   macro avg     0.9274    0.9177    0.9218      1157
weighted avg     0.9416    0.9412    0.9410      1157

      Epoch       Loss   Accuracy    Macro(P          R        F1) Weighted(P          R        F1)


        5/5     0.2905      88.65     0.8913     0.8915     0.8914     0.8913     0.8915     0.8914: 100%|██████████| 56/56 [01:11<00:00,  1.27s/it]


                  Loss


                0.2219: 100%|██████████| 37/37 [00:20<00:00,  1.85it/s]

              precision    recall  f1-score   support

    drowning     0.9341    0.8571    0.8940       182
    swimming     0.9162    0.9826    0.9483       690
  treadwater     0.9320    0.8175    0.8710       285

    accuracy                         0.9222      1157
   macro avg     0.9274    0.8858    0.9044      1157
weighted avg     0.9229    0.9222    0.9207      1157






In [5]:
results

([{'loss': 0.677042370329478,
   'accuracy': 69.90400903444382,
   'macro_f1': 0.7016033907871405,
   'weighted_f1': 0.7016033907871405,
   'macro_precision': 0.7012251214055705,
   'weighted_precision': 0.7012251214055704,
   'macro_recall': 0.7030096536059057,
   'weighted_recall': 0.7030096536059057},
  {'loss': 0.38468805767063585,
   'accuracy': 84.30265386787126,
   'macro_f1': 0.8470559795932763,
   'weighted_f1': 0.8470559795932763,
   'macro_precision': 0.8468855401125621,
   'weighted_precision': 0.8468855401125621,
   'macro_recall': 0.8478137421919364,
   'weighted_recall': 0.8478137421919364},
  {'loss': 0.29706055098878487,
   'accuracy': 87.12591756070017,
   'macro_f1': 0.8760057325695411,
   'weighted_f1': 0.8760057325695411,
   'macro_precision': 0.8758951375618041,
   'weighted_precision': 0.8758951375618043,
   'macro_recall': 0.8762067007382169,
   'weighted_recall': 0.8762067007382169},
  {'loss': 0.29004918318241835,
   'accuracy': 88.42461885940146,
   'macro_f1

In [6]:
model.save(
    os.path.join(os.getenv('CNN_MODEL_DIR'), 'test.pt')
)

Model saved to C:\Users\hp\Downloads\Drowning-Detection\models\CNN\test.pt


In [None]:
# Add model prediction here