In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os


# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [6]:
import os
import shutil

# --- 1. Define Your Paths ---
# Source directory (read-only)
source_dir = '/kaggle/input/falcon/HackByte_Dataset'

# Destination directory (writable)
working_dir = '/kaggle/working/'

# --- 2. List the files you need to copy ---
files_to_copy = ['yolo_params.yaml', 'train.py', 'predict.py']

# --- 3. Copy each file ---
for file_name in files_to_copy:
    source_file = os.path.join(source_dir, file_name)
    destination_file = os.path.join(working_dir, file_name)
    
    # Copy the file
    shutil.copy(source_file, destination_file)
    print(f"Copied '{file_name}' to {working_dir}")

print("\nFiles are now in your writable directory!")

Copied 'yolo_params.yaml' to /kaggle/working/
Copied 'train.py' to /kaggle/working/
Copied 'predict.py' to /kaggle/working/

Files are now in your writable directory!


In [7]:
# Install the library to handle YAML files
!pip install pyyaml

import yaml

# Path to the COPIED yaml file
yaml_file_path = '/kaggle/working/yolo_params.yaml'

# --- 1. Load the yaml file ---
with open(yaml_file_path, 'r') as file:
    yolo_config = yaml.safe_load(file)

# --- 2. Make your edits ---
# For example, let's say you want to change the path to be sure it's correct
yolo_config['path'] = '/kaggle/input/falcon/HackByte_Dataset/data' 

# Or maybe you want to add a new parameter (this part is for later experiments)
# yolo_config['epochs'] = 100 

# --- 3. Save the changes back to the file ---
with open(yaml_file_path, 'w') as file:
    yaml.dump(yolo_config, file)

print("yolo_params.yaml has been updated successfully!")
# You can print the config to verify
print(yolo_config)

yolo_params.yaml has been updated successfully!
{'train': 'data/train/images', 'val': 'data/val/images', 'test': 'data/test', 'nc': 3, 'names': ['FireExtinguisher', 'ToolBox', 'OxygenTank'], 'path': '/kaggle/input/falcon/HackByte_Dataset/data'}


In [4]:
#-------------------------------------------------------------------
# STEP 1: INSTALL DEPENDENCIES
#-------------------------------------------------------------------
print("Installing dependencies...")
!pip install ultralytics -q
!pip install pyyaml -q
print("Installation complete!")

#-------------------------------------------------------------------
# STEP 2: PREPARE A CLEAN WRITABLE DIRECTORY
#-------------------------------------------------------------------
import os
import shutil

working_dir = '/kaggle/working/'

print(f"Cleaning contents of {working_dir}...")
if os.path.exists(working_dir):
    for item in os.listdir(working_dir):
        item_path = os.path.join(working_dir, item)
        try:
            if os.path.isfile(item_path) or os.path.islink(item_path):
                os.unlink(item_path)
            elif os.path.isdir(item_path):
                shutil.rmtree(item_path)
        except Exception as e:
            print(f'Failed to delete {item_path}. Reason: {e}')
else:
    os.makedirs(working_dir)
print("Working directory is clean.")

#-------------------------------------------------------------------
# STEP 3: COPY NECESSARY FILES
#-------------------------------------------------------------------
source_dir = '/kaggle/input/falcon/HackByte_Dataset'
files_to_copy = ['yolo_params.yaml', 'train.py', 'predict.py']
for file_name in files_to_copy:
    shutil.copy(os.path.join(source_dir, file_name), os.path.join(working_dir, file_name))
print(f"Copied essential files to {working_dir}")

#-------------------------------------------------------------------
# STEP 4: MODIFY THE YAML CONFIGURATION FILE << THE IMPORTANT FIX
#-------------------------------------------------------------------
import yaml

yaml_file_path = os.path.join(working_dir, 'yolo_params.yaml')

# Load the existing yaml file
with open(yaml_file_path, 'r') as file:
    yolo_config = yaml.safe_load(file)

# --- CORRECT THE PATHS ---
yolo_config['path'] = '/kaggle/input/falcon/HackByte_Dataset/data'
yolo_config['train'] = 'train/images'  # <<< CORRECTED LINE
yolo_config['val'] = 'val/images'      # <<< CORRECTED LINE
yolo_config['test'] = 'test/images'    # <<< CORRECTED LINE (assuming test images are also in an 'images' folder)

# --- ADD OTHER TRAINING PARAMETERS ---
yolo_config['imgsz'] = 640
yolo_config['batch'] = 16
yolo_config['epochs'] = 50 # Let's start with 50 epochs

# --- ENSURE CLASS NAMES ARE CORRECT ---
yolo_config['nc'] = 3
yolo_config['names'] = ['FireExtinguisher', 'ToolBox', 'OxygenTank'] # From your log


# Save the changes back to the file
with open(yaml_file_path, 'w') as file:
    yaml.dump(yolo_config, file, sort_keys=False) # sort_keys=False keeps the order nice

print("\n--- Updated yolo_params.yaml content ---")
with open(yaml_file_path, 'r') as file:
    print(file.read())
print("----------------------------------------")

#-------------------------------------------------------------------
# STEP 5: NAVIGATE AND RUN TRAINING
#-------------------------------------------------------------------
%cd /kaggle/working/

print(f"\nCurrent directory: {os.getcwd()}")
print("\nStarting model training...")

# Run the training script. It will use the fully corrected yolo_params.yaml.
!python train.py

Installing dependencies...
Installation complete!
Cleaning contents of /kaggle/working/...
Working directory is clean.
Copied essential files to /kaggle/working/

--- Updated yolo_params.yaml content ---
train: train/images
val: val/images
test: test/images
nc: 3
names:
- FireExtinguisher
- ToolBox
- OxygenTank
path: /kaggle/input/falcon/HackByte_Dataset/data
imgsz: 640
batch: 16
epochs: 50

----------------------------------------
/kaggle/working

Current directory: /kaggle/working

Starting model training...
Downloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov
Ultralytics 8.3.172 🚀 Python-3.11.13 torch-2.6.0+cu124 CUDA:0 (Tesla T4, 15095MiB)
[34m[1mengine/trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=16, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=/kaggle/working/yolo_params.yaml, degrees=0.0, de

In [8]:
#-------------------------------------------------------------------
# STEP 1: INSTALL DEPENDENCIES
#-------------------------------------------------------------------
print("Installing dependencies...")
!pip install ultralytics -q
!pip install pyyaml -q
!pip install opencv-python -q # For image processing
print("Installation complete!")

#-------------------------------------------------------------------
# STEP 2: SETUP DIRECTORIES AND COPY FILES
#-------------------------------------------------------------------
import os
import shutil
import cv2
import numpy as np
import random

# --- Basic Setup ---
source_data_dir = '/kaggle/input/falcon/HackByte_Dataset/data'
working_dir = '/kaggle/working/'
new_data_dir = os.path.join(working_dir, 'data_augmented')

# --- Clean and Create Directories ---
print("Setting up working directories...")
if os.path.exists(new_data_dir):
    shutil.rmtree(new_data_dir)
os.makedirs(new_data_dir, exist_ok=True)

# --- Copy the entire original dataset to our new location ---
shutil.copytree(source_data_dir, new_data_dir, dirs_exist_ok=True)
print("Copied original dataset to new augmented directory.")

# --- Copy scripts ---
source_script_dir = '/kaggle/input/falcon/HackByte_Dataset'
files_to_copy = ['yolo_params.yaml', 'train.py', 'predict.py']
for file_name in files_to_copy:
    shutil.copy(os.path.join(source_script_dir, file_name), os.path.join(working_dir, file_name))
print("Copied scripts.")


#-------------------------------------------------------------------
# STEP 3: GENERALIZED AUGMENTATION (ALL CLASSES)
#-------------------------------------------------------------------
print("\nStarting generalized data augmentation for ALL classes...")
train_images_path = os.path.join(new_data_dir, 'train', 'images')
train_labels_path = os.path.join(new_data_dir, 'train', 'labels')

augmentation_count = 0
for label_file in os.listdir(train_labels_path):
    if not label_file.endswith('.txt'):
        continue
    
    label_path = os.path.join(train_labels_path, label_file)
    with open(label_path, 'r') as f:
        lines = f.readlines()

    # Check if there are ANY objects in this image
    if lines:
        image_file = label_file.replace('.txt', '.png')
        image_path = os.path.join(train_images_path, image_file)
        if not os.path.exists(image_path):
            image_file = label_file.replace('.txt', '.jpg')
            image_path = os.path.join(train_images_path, image_file)

        image = cv2.imread(image_path)
        if image is None:
            continue
            
        h, w, _ = image.shape
        augmented_image = image.copy()
        
        for _ in range(random.randint(1, 3)):
            box_w = int(w * random.uniform(0.1, 0.3))
            box_h = int(h * random.uniform(0.1, 0.3))
            x1 = random.randint(0, w - box_w)
            y1 = random.randint(0, h - box_h)
            cv2.rectangle(augmented_image, (x1, y1), (x1 + box_w, y1 + box_h), (0, 0, 0), -1)

        new_image_filename = f"aug_occlusion_{image_file}"
        new_label_filename = f"aug_occlusion_{label_file}"
        cv2.imwrite(os.path.join(train_images_path, new_image_filename), augmented_image)
        shutil.copy(label_path, os.path.join(train_labels_path, new_label_filename))
        augmentation_count += 1
print(f"Generated {augmentation_count} new images with artificial occlusion for all classes.")


Installing dependencies...
Installation complete!
Setting up working directories...
Copied original dataset to new augmented directory.
Copied scripts.

Starting generalized data augmentation for ALL classes...
Generated 841 new images with artificial occlusion for all classes.

--- Running training on AUGMENTED dataset ---
YAML Configuration:
train: train/images
val: val/images
test: test/images
nc: 3
names:
- FireExtinguisher
- ToolBox
- OxygenTank
path: /kaggle/working/data_augmented
imgsz: 640
batch: 16
epochs: 125
patience: 20

---------------------------------------------
/kaggle/working
usage: train.py [-h] [--epochs EPOCHS] [--mosaic MOSAIC]
                [--optimizer OPTIMIZER] [--momentum MOMENTUM] [--lr0 LR0]
                [--lrf LRF] [--single_cls SINGLE_CLS]
train.py: error: unrecognized arguments: --patience=20


In [9]:
#-------------------------------------------------------------------
# STEP 4: CONFIGURE AND TRAIN ON THE AUGMENTED DATASET
#-------------------------------------------------------------------
import yaml

yaml_file_path = os.path.join(working_dir, 'yolo_params.yaml')

with open(yaml_file_path, 'r') as file:
    yolo_config = yaml.safe_load(file)

# --- Point to our new augmented dataset ---
yolo_config['path'] = new_data_dir # <<< CRITICAL CHANGE
yolo_config['train'] = 'train/images'
yolo_config['val'] = 'val/images'
yolo_config['test'] = 'test/images'

# --- Other training parameters ---
yolo_config['imgsz'] = 640
yolo_config['batch'] = 16
yolo_config['epochs'] = 125 # Train a bit longer on the bigger dataset

# --- Save the changes ---
with open(yaml_file_path, 'w') as file:
    yaml.dump(yolo_config, file, sort_keys=False)

print("\n--- Running training on AUGMENTED dataset ---")
print("YAML Configuration:")
with open(yaml_file_path, 'r') as file:
    print(file.read())
print("---------------------------------------------")

# --- Navigate and Run ---
%cd /kaggle/working/
!python train.py --epochs 125 --mosaic 1.0 --lr0 0.0008 


--- Running training on AUGMENTED dataset ---
YAML Configuration:
train: train/images
val: val/images
test: test/images
nc: 3
names:
- FireExtinguisher
- ToolBox
- OxygenTank
path: /kaggle/working/data_augmented
imgsz: 640
batch: 16
epochs: 125
patience: 20

---------------------------------------------
/kaggle/working
Downloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov
Ultralytics 8.3.172 🚀 Python-3.11.13 torch-2.6.0+cu124 CUDA:0 (Tesla T4, 15095MiB)
[34m[1mengine/trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=16, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=/kaggle/working/yolo_params.yaml, degrees=0.0, deterministic=True, device=0, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=125, erasing=0.4, exist_ok=False, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=

In [15]:
# ===================================================================
# STEP 1: INSTALL DEPENDENCIES
# ===================================================================
print("Installing dependencies...")
!pip install ultralytics -q
!pip install pyyaml -q
!pip install opencv-python -q
print("Installation complete!")

# ===================================================================
# STEP 2: SETUP DIRECTORIES
# ===================================================================
import os
import shutil
import cv2
import numpy as np
import random
import yaml
import torch
import torch.nn as nn
from ultralytics import YOLO
from ultralytics.nn.modules import Conv, C2f, SPPF
from ultralytics.nn.tasks import Detect
import math
from ultralytics.utils.loss import BboxLoss

print("\nSetting up working directories...")
working_dir = '/kaggle/working/'
source_data_dir = '/kaggle/input/falcon/HackByte_Dataset/data'
new_data_dir = os.path.join(working_dir, 'data_augmented')

if os.path.exists(new_data_dir):
    shutil.rmtree(new_data_dir)
shutil.copytree(source_data_dir, new_data_dir)
print("Copied original dataset to new augmented directory.")

shutil.copytree('/kaggle/input/falcon/HackByte_Dataset', working_dir, dirs_exist_ok=True)
print("Copied essential files to working directory.")

# ===================================================================
# STEP 3: DYNAMICALLY CREATE CUSTOM MODULES FILE AND IMPORT
# ===================================================================
print("\nDynamically creating custom_modules.py...")
custom_modules_code = """
import torch
import torch.nn as nn
import math
from ultralytics.nn.modules import Conv, C2f, SPPF
from ultralytics.nn.tasks import Detect

class ChannelAttention(nn.Module):
    def __init__(self, in_planes, ratio=16):
        super().__init__()
        self.avg_pool = nn.AdaptiveAvgPool2d(1)
        self.max_pool = nn.AdaptiveMaxPool2d(1)
        self.fc = nn.Sequential(nn.Conv2d(in_planes, in_planes // ratio, 1, bias=False),
                                nn.ReLU(),
                                nn.Conv2d(in_planes // ratio, in_planes, 1, bias=False))
        self.sigmoid = nn.Sigmoid()
    def forward(self, x):
        avg_out = self.fc(self.avg_pool(x))
        max_out = self.fc(self.max_pool(x))
        out = avg_out + max_out
        return self.sigmoid(out)

class SpatialAttention(nn.Module):
    def __init__(self, kernel_size=7):
        super().__init__()
        assert kernel_size in (3, 7), 'kernel size must be 3 or 7'
        padding = 3 if kernel_size == 7 else 1
        self.conv1 = nn.Conv2d(2, 1, kernel_size, padding=padding, bias=False)
        self.sigmoid = nn.Sigmoid()
    def forward(self, x):
        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        x = torch.cat([avg_out, max_out], dim=1)
        x = self.conv1(x)
        return self.sigmoid(x)

class CBAM(nn.Module):
    def __init__(self, c1, c2):
        super().__init__()
        self.channel_attention = ChannelAttention(c1)
        self.spatial_attention = SpatialAttention()
    def forward(self, x):
        x = x * self.channel_attention(x)
        x = x * self.spatial_attention(x)
        return x

class PConv(nn.Module):
    def __init__(self, dim):
        super().__init__()
        self.dim = dim
        self.conv = nn.Conv2d(dim, dim, 3, padding=1, groups=dim)
        self.act = nn.ReLU(inplace=True)
    def forward(self, x: torch.Tensor) -> torch.Tensor:
        out_channel = x.shape[1] // 2
        x[:, :out_channel, :, :] = self.conv(x[:, :out_channel, :, :])
        return self.act(x)

def bbox_iou_siou(box1, box2, eps=1e-7):
    b1_x1, b1_y1, b1_x2, b1_y2 = box1.chunk(4, -1)
    b2_x1, b2_y1, b2_x2, b2_y2 = box2.chunk(4, -1)
    w1, h1 = b1_x2 - b1_x1, b1_y2 - b1_y1 + eps
    w2, h2 = b2_x2 - b2_x1, b2_y2 - b2_y1 + eps
    inter = (torch.min(b1_x2, b2_x2) - torch.max(b1_x1, b2_x1)).clamp(0) * \
            (torch.min(b1_y2, b2_y2) - torch.max(b1_y1, b2_y1)).clamp(0)
    union = w1 * h1 + w2 * h2 - inter + eps
    iou = inter / union
    c_x, c_y = (b1_x1 + b1_x2) / 2, (b1_y1 + b1_y2) / 2
    c_x_gt, c_y_gt = (b2_x1 + b2_x2) / 2, (b2_y1 + b2_y2) / 2
    cw, ch = torch.max(b1_x2, b2_x2) - torch.min(b1_x1, b2_x1), torch.max(b1_y2, b2_y2) - torch.min(b1_y1, b2_y1)
    sigma = torch.pow(c_x - c_x_gt, 2) + torch.pow(c_y - c_y_gt, 2)
    sin_alpha_1 = torch.abs(c_y - c_y_gt) / torch.sqrt(sigma + eps)
    sin_alpha_2 = torch.abs(c_x - c_x_gt) / torch.sqrt(sigma + eps)
    threshold = math.pi / 4
    angle_cost = torch.sin(2 * torch.arcsin(torch.sin(torch.abs(torch.asin(sin_alpha_1) - threshold))))
    rho_x = (c_x_gt - c_x) / cw
    rho_y = (c_y_gt - c_y) / ch
    gamma = 2 - angle_cost
    distance_cost = 1 - torch.exp(-gamma * (torch.pow(rho_x, 2) + torch.pow(rho_y, 2)))
    omega_w = torch.abs(w1 - w2) / torch.max(w1, w2)
    omega_h = torch.abs(h1 - h2) / torch.max(h1, h2)
    shape_cost = torch.pow(1 - torch.exp(-1 * omega_w), 4) + torch.pow(1 - torch.exp(-1 * omega_h), 4)
    siou_loss = 1 - iou + distance_cost + shape_cost
    return siou_loss
"""
with open(os.path.join(working_dir, 'custom_modules.py'), 'w') as f:
    f.write(custom_modules_code)

print("Custom modules created.")


# ===================================================================
# STEP 4: CREATE CUSTOM MODEL YAML FILE
# ===================================================================
yolov8s_custom_yaml = """
# YOLOv8s-custom model with CBAM, PConv, and Dropout
# This YAML now references modules directly by their name.
nc: 3
depth_multiple: 0.33
width_multiple: 0.50

backbone:
  [[-1, 1, Conv, [32, 3, 2]],
   [-1, 1, Conv, [64, 3, 2]],
   [-1, 1, C2f, [64, 64, 1, True]],
   [-1, 1, Conv, [128, 3, 2]],
   [-1, 2, C2f, [128, 128, 2, True]],
   
   # Custom blocks
   [-1, 1, CBAM, [128, 128]],
   [-1, 1, PConv, [128]],

   [-1, 1, Conv, [256, 3, 2]],
   [-1, 2, C2f, [256, 256, 2, True]],
   [-1, 1, Conv, [512, 3, 2]],
   [-1, 1, C2f, [512, 512, 1, True]],
   [-1, 1, SPPF, [512, 512, 5]]
  ]

head:
  [[-1, 1, nn.Upsample, [None, 2, 'nearest']],
   [[-1, 6], 1, torch.cat, [1]],
   [-1, 1, C2f, [768, 256, 1]],
   
   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
   [[-1, 4], 1, torch.cat, [1]],
   [-1, 1, C2f, [384, 128, 1]],
   
   [-1, 1, nn.Dropout2d, [0.1]],
   
   [-1, 1, Conv, [128, 128, 3, 2]],
   [[-1, 12], 1, torch.cat, [1]],
   [-1, 1, C2f, [384, 256, 1]],
   
   [-1, 1, Conv, [256, 256, 3, 2]],
   [[-1, 9], 1, torch.cat, [1]],
   [-1, 1, C2f, [768, 512, 1]],
   
   [[15, 18, 21], 1, Detect, [3, [128, 256, 512]]]
  ]
"""
with open(os.path.join(working_dir, 'yolov8s-custom.yaml'), 'w') as f:
    f.write(yolov8s_custom_yaml)
print("Custom model YAML file created.")

# ===================================================================
# STEP 5: GENERALIZED DATA AUGMENTATION
# ===================================================================
print("\nStarting generalized data augmentation for ALL classes...")
train_images_path = os.path.join(new_data_dir, 'train', 'images')
train_labels_path = os.path.join(new_data_dir, 'train', 'labels')

augmentation_count = 0
for label_file in os.listdir(train_labels_path):
    if not label_file.endswith('.txt'):
        continue
    
    label_path = os.path.join(train_labels_path, label_file)
    with open(label_path, 'r') as f:
        lines = f.readlines()

    if lines:
        image_file = label_file.replace('.txt', '.png')
        image_path = os.path.join(train_images_path, image_file)
        if not os.path.exists(image_path):
            image_file = label_file.replace('.txt', '.jpg')
            image_path = os.path.join(train_images_path, image_file)

        image = cv2.imread(image_path)
        if image is None:
            continue
            
        h, w, _ = image.shape
        augmented_image = image.copy()
        
        for _ in range(random.randint(1, 3)):
            box_w = int(w * random.uniform(0.1, 0.3))
            box_h = int(h * random.uniform(0.1, 0.3))
            x1 = random.randint(0, w - box_w)
            y1 = random.randint(0, h - box_h)
            cv2.rectangle(augmented_image, (x1, y1), (x1 + box_w, y1 + box_h), (0, 0, 0), -1)

        new_image_filename = f"aug_occlusion_{image_file}"
        new_label_filename = f"aug_occlusion_{label_file}"
        cv2.imwrite(os.path.join(train_images_path, new_image_filename), augmented_image)
        shutil.copy(label_path, os.path.join(train_labels_path, new_label_filename))
        augmentation_count += 1
print(f"Generated {augmentation_count} new images with artificial occlusion for all classes.")

# ===================================================================
# STEP 6: CONFIGURE AND TRAIN THE NEW MODEL (NATIVE PYTHON)
# This is the key to a successful run.
# ===================================================================
# We import our custom modules here to ensure they are in the current namespace.
import custom_modules
from custom_modules import CBAM, PConv, bbox_iou_siou
print("\nCustom modules imported successfully into the current session.")

print("\nDynamically replacing BboxLoss with SIoU loss.")
BboxLoss.iou = bbox_iou_siou

print("\nStarting model training with custom architecture and SIoU loss...")

# Change the current directory so YOLO can find our custom_modules.py file
os.chdir(working_dir)

# Initialize YOLO model with our custom YAML file
model = YOLO(os.path.join(working_dir, 'yolov8s-custom.yaml'))

# Train the model using the native Python API, which works in the current session.
model.train(
    data=os.path.join(new_data_dir, 'yolo_params.yaml'),
    epochs=100,
    imgsz=640,
    project='runs/detect',
    name='custom_model_v3'
)

Installing dependencies...
Installation complete!

Setting up working directories...
Copied original dataset to new augmented directory.
Copied essential files to working directory.

Dynamically creating custom_modules.py...
Custom modules created.
Custom model YAML file created.

Starting generalized data augmentation for ALL classes...
Generated 841 new images with artificial occlusion for all classes.

Custom modules imported successfully into the current session.

Dynamically replacing BboxLoss with SIoU loss.

Starting model training with custom architecture and SIoU loss...


KeyError: 'CBAM'

In [17]:
# Import custom modules
import custom_modules
from custom_modules import CBAM, PConv, bbox_iou_siou

# Register for YOLO parsing inside ultralytics internals
import ultralytics.nn.tasks as tasks
tasks.__dict__["CBAM"] = CBAM
tasks.__dict__["PConv"] = PConv

print("\nCustom modules imported successfully into the current session.")

# Replace default bbox loss with custom SIoU
print("\nDynamically replacing BboxLoss with SIoU loss.")
BboxLoss.iou = bbox_iou_siou

print("\nStarting model training with custom architecture and SIoU loss...")

# Change working directory
os.chdir(working_dir)

# Create YOLO model from custom YAML
model = YOLO(os.path.join(working_dir, 'yolov8s-custom.yaml'))

# Train
model.train(
    data=os.path.join(new_data_dir, 'yolo_params.yaml'),
    epochs=100,
    imgsz=640,
    project='runs/detect',
    name='custom_model_v3'
)



Custom modules imported successfully into the current session.

Dynamically replacing BboxLoss with SIoU loss.

Starting model training with custom architecture and SIoU loss...


KeyError: 'CBAM'

In [None]:
# ===================================================================
# STEP 1: INSTALL DEPENDENCIES
# ===================================================================
print("Installing dependencies...")
!pip install ultralytics -q
!pip install pyyaml -q
!pip install opencv-python -q
!pip install timm -q 
print("Installation complete!")

# ===================================================================
# STEP 2: SETUP DIRECTORIES AND CUSTOM MODULES
# ===================================================================
import os
import shutil
import cv2
import numpy as np
import random
import yaml
import torch
import torch.nn as nn
from ultralytics import YOLO
from ultralytics.nn.modules import Conv, C2f, SPPF
from ultralytics.nn.tasks import Detect
import math
from ultralytics.utils.loss import BboxLoss
import timm



In [39]:
# ===================================================================
# STEP 1: INSTALL DEPENDENCIES
# ===================================================================
print("Installing dependencies...")
!pip install ultralytics -q
!pip install pyyaml -q
!pip install opencv-python -q
print("Installation complete!")

# ===================================================================
# STEP 2: SETUP, COPY, AND AUGMENT THE DATA (Our Winning Strategy)
# ===================================================================
import os
import shutil
import cv2
import numpy as np
import random
import yaml
import torch
import torch.nn as nn
from ultralytics import YOLO
from ultralytics.nn.modules import Conv, C2f, SPPF, Concat
from ultralytics.nn.tasks import Detect
from ultralytics.utils.loss import BboxLoss
from ultralytics.utils.ops import xywh2xyxy
from ultralytics.utils.metrics import bbox_iou
import math

# --- Basic Setup ---
source_data_dir = '/kaggle/input/falcon/HackByte_Dataset/data'
working_dir = '/kaggle/working/'
new_data_dir = os.path.join(working_dir, 'data_augmented')

# --- Clean and Create Directories ---
print("\nSetting up working directories...")
if os.path.exists(new_data_dir):
    shutil.rmtree(new_data_dir)
shutil.copytree(source_data_dir, new_data_dir, dirs_exist_ok=True)

# --- Perform Artificial Occlusion ---
print("\nStarting advanced augmentation...")
train_images_path = os.path.join(new_data_dir, 'train', 'images')
train_labels_path = os.path.join(new_data_dir, 'train', 'labels')
augmentation_count = 0
if not any(f.startswith('aug_occlusion_') for f in os.listdir(train_images_path)):
    for label_file in os.listdir(train_labels_path):
        if not label_file.endswith('.txt'): continue
        with open(os.path.join(train_labels_path, label_file), 'r') as f:
            lines = f.readlines()
        if lines:
            image_file = label_file.replace('.txt', '.png')
            image_path = os.path.join(train_images_path, image_file)
            if not os.path.exists(image_path):
                image_file = label_file.replace('.txt', '.jpg')
                image_path = os.path.join(train_images_path, image_file)
            image = cv2.imread(image_path)
            if image is None: continue
            h, w, _ = image.shape
            augmented_image = image.copy()
            for _ in range(random.randint(1, 3)):
                box_w, box_h = int(w*random.uniform(0.1,0.3)), int(h*random.uniform(0.1,0.3))
                x1, y1 = random.randint(0, w-box_w), random.randint(0, h-box_h)
                cv2.rectangle(augmented_image, (x1, y1), (x1+box_w, y1+box_h), (0,0,0), -1)
            new_image_filename = f"aug_occlusion_{image_file}"
            cv2.imwrite(os.path.join(train_images_path, new_image_filename), augmented_image)
            shutil.copy(os.path.join(train_labels_path, label_file), os.path.join(train_labels_path, new_image_filename.replace('.png', '.txt').replace('.jpg', '.txt')))
            augmentation_count += 1
    print(f"Generated {augmentation_count} new images with artificial occlusion.")
else:
    print("Augmented images already exist. Skipping generation.")


# ===================================================================
# STEP 3: DEFINE AND MANUALLY INJECT CUSTOM MODULES
# ===================================================================
print("\nDefining custom modules and loss function...")

# --- CBAM Module Definition ---
class ChannelAttention(nn.Module):
    def __init__(self, in_planes, ratio=16):
        super(ChannelAttention, self).__init__()
        self.avg_pool = nn.AdaptiveAvgPool2d(1)
        self.max_pool = nn.AdaptiveMaxPool2d(1)
        self.fc = nn.Sequential(nn.Conv2d(in_planes, in_planes // ratio, 1, bias=False), nn.ReLU(), nn.Conv2d(in_planes // ratio, in_planes, 1, bias=False))
        self.sigmoid = nn.Sigmoid()
    def forward(self, x):
        avg_out = self.fc(self.avg_pool(x))
        max_out = self.fc(self.max_pool(x))
        return self.sigmoid(avg_out + max_out)

class SpatialAttention(nn.Module):
    def __init__(self, kernel_size=7):
        super(SpatialAttention, self).__init__()
        padding = 3 if kernel_size == 7 else 1
        self.conv1 = nn.Conv2d(2, 1, kernel_size, padding=padding, bias=False)
        self.sigmoid = nn.Sigmoid()
    def forward(self, x):
        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        x = torch.cat([avg_out, max_out], dim=1)
        return self.sigmoid(self.conv1(x))

class CBAM(nn.Module):
    def __init__(self, c1, c2):
        super().__init__()
        self.channel_attention = ChannelAttention(c1)
        self.spatial_attention = SpatialAttention()
    def forward(self, x):
        x = x * self.channel_attention(x)
        return x * self.spatial_attention(x)

# --- FasterNet-like PConv Block ---
class PConv(nn.Module):
    def __init__(self, dim):
        super().__init__()
        self.dim = dim
        self.conv = nn.Conv2d(dim // 2, dim // 2, 3, padding=1, groups=dim // 2)
        self.act = nn.ReLU(inplace=True)
    def forward(self, x: torch.Tensor) -> torch.Tensor:
        out_channel = x.shape[1] // 2
        x[:, :out_channel, :, :] = self.conv(x[:, :out_channel, :, :])
        return self.act(x)

# --- SIoU Loss Function ---
def siou_loss(pred, target, eps=1e-7):
    # This is a simplified SIoU function to replace the default in BboxLoss
    b1_x1, b1_y1, b1_x2, b1_y2 = pred.chunk(4, -1)
    b2_x1, b2_y1, b2_x2, b2_y2 = target.chunk(4, -1)
    w1, h1 = b1_x2 - b1_x1, b1_y2 - b1_y1 + eps
    w2, h2 = b2_x2 - b2_x1, b2_y2 - b2_y1 + eps
    inter = (torch.min(b1_x2, b2_x2) - torch.max(b1_x1, b2_x1)).clamp(0) * (torch.min(b1_y2, b2_y2) - torch.max(b1_y1, b2_y1)).clamp(0)
    union = w1 * h1 + w2 * h2 - inter + eps
    iou = inter / union
    c_x, c_y = (b1_x1 + b1_x2) / 2, (b1_y1 + b1_y2) / 2
    c_x_gt, c_y_gt = (b2_x1 + b2_x2) / 2, (b2_y1 + b2_y2) / 2
    cw, ch = torch.max(b1_x2, b2_x2) - torch.min(b1_x1, b2_x1), torch.max(b1_y2, b2_y2) - torch.min(b1_y1, b2_y1)
    sigma = torch.pow(c_x - c_x_gt, 2) + torch.pow(c_y - c_y_gt, 2)
    sin_alpha_1 = torch.abs(c_y - c_y_gt) / torch.sqrt(sigma + eps)
    sin_alpha_2 = torch.abs(c_x - c_x_gt) / torch.sqrt(sigma + eps)
    threshold = math.pi / 4
    angle_cost = torch.sin(2 * torch.arcsin(torch.sin(torch.abs(torch.asin(sin_alpha_1) - threshold))))
    rho_x = (c_x_gt - c_x) / cw
    rho_y = (c_y_gt - c_y) / ch
    gamma = 2 - angle_cost
    distance_cost = 1 - torch.exp(-gamma * (torch.pow(rho_x, 2) + torch.pow(rho_y, 2)))
    omega_w = torch.abs(w1 - w2) / torch.max(w1, w2)
    omega_h = torch.abs(h1 - h2) / torch.max(h1, h2)
    shape_cost = torch.pow(1 - torch.exp(-1 * omega_w), 4) + torch.pow(1 - torch.exp(-1 * omega_h), 4)
    siou_loss = 1 - iou + distance_cost + shape_cost
    return siou_loss

# --- Manual Model Injection ---
print("Manually injecting custom layers into the YOLOv8s backbone...")
model = YOLO('yolov8s.pt')
backbone = model.model.model[:-1]
head = model.model.model[-1]

# Inject CBAM and PConv after the C2f block at index 6
backbone.insert(7, CBAM(256, 256))
backbone.insert(8, PConv(256))

# Rebuild the full model
model.model.model = torch.nn.Sequential(*backbone, head)
model.model.model.to(torch.device('cuda' if torch.cuda.is_available() else 'cpu'))
model.to(torch.device('cuda' if torch.cuda.is_available() else 'cpu'))
print("Model architecture modified successfully.")


# ===================================================================
# STEP 4: CONFIGURE AND RUN TRAINING PROGRAMMATICALLY
# ===================================================================
# --- Create the data configuration file ---
data_yaml_path = os.path.join(working_dir, 'data_config.yaml')
with open(data_yaml_path, 'w') as f:
    yaml.dump({
        'path': new_data_dir,
        'train': 'train/images',
        'val': 'val/images',
        'test': 'test/images',
        'nc': 3,
        'names': ['FireExtinguisher', 'ToolBox', 'OxygenTank']
    }, f)
print("\nCreated data configuration file.")

# --- Patch the loss function in the same session ---
BboxLoss.iou = siou_loss
print("BboxLoss patched with SIoU loss function.")

# --- Run the training directly from Python ---
print("\n--- Running FINAL POLISH training programmatically ---")
os.chdir(working_dir)
model.train(
    data=data_yaml_path,
    epochs=100,
    imgsz=640,
    batch=16,
    mosaic=1.0,
    lr0=0.0008,
    cos_lr=True,
    project='runs/detect',
    name='Final_Model_Custom',
    patience=25
)

# After training, run validation on the test set
print("\n--- Running final validation on the test set ---")
model.val(split='test')

Installing dependencies...
Installation complete!

Setting up working directories...

Starting advanced augmentation...
Generated 841 new images with artificial occlusion.

Defining custom modules and loss function...
Manually injecting custom layers into the YOLOv8s backbone...
Model architecture modified successfully.

Created data configuration file.
BboxLoss patched with SIoU loss function.

--- Running FINAL POLISH training programmatically ---
[34m[1mengine/trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=16, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=True, cutmix=0.0, data=/kaggle/working/data_config.yaml, degrees=0.0, deterministic=True, device=None, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=100, erasing=0.4, exist_ok=False, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015,

[34m[1mtrain: [0mScanning /kaggle/working/data_augmented/train/labels... 1687 images, 5 backgrounds, 0 corrupt: 100%|██████████| 1687/1687 [00:14<00:00, 116.19it/s]


[34m[1mtrain: [0mNew cache created: /kaggle/working/data_augmented/train/labels.cache
[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01, method='weighted_average', num_output_channels=3), CLAHE(p=0.01, clip_limit=(1.0, 4.0), tile_grid_size=(8, 8))
[34m[1mval: [0mFast image access ✅ (ping: 0.0±0.0 ms, read: 1912.0±1341.6 MB/s, size: 3015.9 KB)


[34m[1mval: [0mScanning /kaggle/working/data_augmented/val/labels... 154 images, 0 backgrounds, 0 corrupt: 100%|██████████| 154/154 [00:00<00:00, 341.97it/s]

[34m[1mval: [0mNew cache created: /kaggle/working/data_augmented/val/labels.cache





Plotting labels to runs/detect/Final_Model_Custom/labels.jpg... 
[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.0008' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m AdamW(lr=0.001429, momentum=0.9) with parameter groups 57 weight(decay=0.0), 64 weight(decay=0.0005), 63 bias(decay=0.0)
Image sizes 640 train, 640 val
Using 2 dataloader workers
Logging results to [1mruns/detect/Final_Model_Custom[0m
Starting training for 100 epochs...

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      1/100      7.31G      3.217      3.796      3.941         48        640:  95%|█████████▌| 101/106 [00:50<00:02,  1.98it/s]


KeyboardInterrupt: 

In [1]:
import ultralytics.nn.modules as ultralytics_modules
import ultralytics.nn.tasks as ultralytics_tasks

print("\nDefining and patching custom modules...")

class ChannelAttention(nn.Module):
    def __init__(self, in_planes, ratio=16):
        super(ChannelAttention, self).__init__()
        self.avg_pool = nn.AdaptiveAvgPool2d(1)
        self.max_pool = nn.AdaptiveMaxPool2d(1)
        self.fc = nn.Sequential(nn.Conv2d(in_planes, in_planes // ratio, 1, bias=False), nn.ReLU(), nn.Conv2d(in_planes // ratio, in_planes, 1, bias=False))
        self.sigmoid = nn.Sigmoid()
    def forward(self, x):
        avg_out = self.fc(self.avg_pool(x))
        max_out = self.fc(self.max_pool(x))
        return self.sigmoid(avg_out + max_out)

class SpatialAttention(nn.Module):
    def __init__(self, kernel_size=7):
        super(SpatialAttention, self).__init__()
        padding = 3 if kernel_size == 7 else 1
        self.conv1 = nn.Conv2d(2, 1, kernel_size, padding=padding, bias=False)
        self.sigmoid = nn.Sigmoid()
    def forward(self, x):
        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        x = torch.cat([avg_out, max_out], dim=1)
        return self.sigmoid(self.conv1(x))

class CBAM(nn.Module):
    def __init__(self, c1, c2):
        super().__init__()
        self.channel_attention = ChannelAttention(c1)
        self.spatial_attention = SpatialAttention()
    def forward(self, x):
        x = x * self.channel_attention(x)
        return x * self.spatial_attention(x)

class PConv(nn.Module):
    def __init__(self, dim):
        super().__init__()
        self.dim = dim
        self.conv = nn.Conv2d(dim // 2, dim // 2, 3, padding=1, groups=dim // 2)
        self.act = nn.ReLU(inplace=True)
    def forward(self, x: torch.Tensor) -> torch.Tensor:
        out_channel = x.shape[1] // 2
        x[:, :out_channel, :, :] = self.conv(x[:, :out_channel, :, :])
        return self.act(x)

ultralytics_modules.__dict__['CBAM'] = CBAM
ultralytics_modules.__dict__['PConv'] = PConv
ultralytics_tasks.__dict__['CBAM'] = CBAM
ultralytics_tasks.__dict__['PConv'] = PConv

def siou_loss(pred, target, eps=1e-7):
    b1_x1, b1_y1, b1_x2, b1_y2 = pred.chunk(4, -1)
    b2_x1, b2_y1, b2_x2, b2_y2 = target.chunk(4, -1)
    w1, h1 = b1_x2 - b1_x1, b1_y2 - b1_y1 + eps
    w2, h2 = b2_x2 - b2_x1, b2_y2 - b2_y1 + eps
    inter = (torch.min(b1_x2, b2_x2) - torch.max(b1_x1, b2_x1)).clamp(0) * (torch.min(b1_y2, b2_y2) - torch.max(b1_y1, b2_y1)).clamp(0)
    union = w1 * h1 + w2 * h2 - inter + eps
    iou = inter / union
    c_x, c_y = (b1_x1 + b1_x2) / 2, (b1_y1 + b1_y2) / 2
    c_x_gt, c_y_gt = (b2_x1 + b2_x2) / 2, (b2_y1 + b2_y2) / 2
    cw, ch = torch.max(b1_x2, b2_x2) - torch.min(b1_x1, b2_x1), torch.max(b1_y2, b2_y2) - torch.min(b1_y1, b2_y1)
    sigma = torch.pow(c_x - c_x_gt, 2) + torch.pow(c_y - c_y_gt, 2)
    sin_alpha_1 = torch.abs(c_y - c_y_gt) / torch.sqrt(sigma + eps)
    sin_alpha_2 = torch.abs(c_x - c_x_gt) / torch.sqrt(sigma + eps)
    threshold = math.pi / 4
    angle_cost = torch.sin(2 * torch.arcsin(torch.sin(torch.abs(torch.asin(sin_alpha_1) - threshold))))
    rho_x = (c_x_gt - c_x) / cw
    rho_y = (c_y_gt - c_y) / ch
    gamma = 2 - angle_cost
    distance_cost = 1 - torch.exp(-gamma * (torch.pow(rho_x, 2) + torch.pow(rho_y, 2)))
    omega_w = torch.abs(w1 - w2) / torch.max(w1, w2)
    omega_h = torch.abs(h1 - h2) / torch.max(h1, h2)
    shape_cost = torch.pow(1 - torch.exp(-1 * omega_w), 4) + torch.pow(1 - torch.exp(-1 * omega_h), 4)
    siou_loss = 1 - iou + distance_cost + shape_cost
    return siou_loss
BboxLoss.iou = siou_loss
print("Custom modules and SIoU loss patched.")


# ===================================================================
# STEP 4: CREATE CUSTOM MODEL YAML FILE
# ===================================================================
yolov8s_custom_yaml = """
nc: 3
depth_multiple: 0.33
width_multiple: 0.50

backbone:
  [[-1, 1, Conv, [32, 3, 2]],
   [-1, 1, Conv, [64, 3, 2]],
   [-1, 1, C2f, [64, 64, 1, True]],
   [-1, 1, Conv, [128, 3, 2]],
   [-1, 2, C2f, [128, 128, 2, True]],
   
   [-1, 1, CBAM, [128, 128]],
   [-1, 1, PConv, [128]],

   [-1, 1, Conv, [256, 3, 2]],
   [-1, 2, C2f, [256, 256, 2, True]],
   [-1, 1, Conv, [512, 3, 2]],
   [-1, 1, C2f, [512, 512, 1, True]],
   
   [-1, 1, SPPF, [512, 512]]
  ]

head:
  [[-1, 1, nn.Upsample, [None, 2, 'nearest']],
   [[-1, 6], 1, Concat, [1]],
   [-1, 1, C2f, [768, 256, 1]],
   
   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
   [[-1, 4], 1, Concat, [1]],
   [-1, 1, C2f, [384, 128, 1]],
   
   [-1, 1, nn.Dropout2d, [0.1]],
   
   [-1, 1, Conv, [128, 128, 3, 2]],
   [[-1, 12], 1, Concat, [1]],
   [-1, 1, C2f, [384, 256, 1]],
   
   [-1, 1, Conv, [256, 256, 3, 2]],
   [[-1, 9], 1, Concat, [1]],
   [-1, 1, C2f, [768, 512, 1]],
   
   # CRITICAL FIX: The Detect module arguments are now correctly formatted
   [[15, 18, 21], 1, Detect, [3, [128, 256, 512]]]
  ]
"""
with open(yaml_path, 'w') as f:
    f.write(yolov8s_custom_yaml)
print("Custom model YAML file created.")

# ===================================================================
# STEP 5: CONFIGURE AND RUN TRAINING PROGRAMMATICALLY
# ===================================================================
with open(data_yaml_path, 'w') as f:
    yaml.dump({
        'path': new_data_dir,
        'train': 'train/images',
        'val': 'val/images',
        'test': 'test/images',
        'nc': 3,
        'names': ['FireExtinguisher', 'ToolBox', 'OxygenTank']
    }, f)
print("\nCreated data configuration file.")

print("\n--- Running FINAL POLISH training programmatically ---")
os.chdir(working_dir)

model = YOLO(yaml_path)
model.train(
    data=data_yaml_path,
    epochs=100,
    imgsz=640,
    batch=16,
    mosaic=1.0,
    lr0=0.0008,
    cos_lr=True,
    project='runs/detect',
    name='Final_Model_Custom',
    patience=25
)


Defining and patching custom modules...


NameError: name 'nn' is not defined

In [2]:
# ===================================================================
# STEP 1: INSTALL DEPENDENCIES
# ===================================================================
print("Installing dependencies...")
!pip install ultralytics -q
!pip install pyyaml -q
!pip install opencv-python -q
!pip install timm -q
print("Installation complete!")

# ===================================================================
# STEP 2: DEFINE AND PATCH CUSTOM MODULES AND LOSS FUNCTION
# ===================================================================
import os
import shutil
import yaml
import cv2
import numpy as np
import torch
import torch.nn as nn
from ultralytics import YOLO
import timm
from ultralytics.nn import tasks
from ultralytics.nn.modules import Conv, C2f, Concat
from ultralytics.nn.modules.head import Detect
from ultralytics.utils.metrics import bbox_iou
from ultralytics.utils.ops import xywh2xyxy
from ultralytics.utils.loss import BboxLoss

print("\nDefining and patching custom modules...")

# 1. Register Detect in globals (fix KeyError)
globals()["Detect"] = Detect

# 2. Custom Swin Transformer Backbone
class TimmBackbone(torch.nn.Module):
    def __init__(self, model_name='swin_small_patch4_window7_224', pretrained=True):
        super().__init__()
        self.model = timm.create_model(
            model_name,
            features_only=True,
            pretrained=pretrained,
            out_indices=(1, 2, 3)
        )

        # Disable strict input size check (important!)
        if hasattr(self.model, 'patch_embed') and hasattr(self.model.patch_embed, 'strict_img_size'):
            self.model.patch_embed.strict_img_size = False

        self.out_channels = self.model.feature_info.channels()
        print(f"Swin Backbone initialized. Output channels: {self.out_channels}")

    def forward(self, x):
        return self.model(x)

# Register the custom backbone
from ultralytics.nn import tasks
tasks.TimmBackbone = TimmBackbone
globals()["TimmBackbone"] = TimmBackbone

# 4. Define SIoU Loss (simple placeholder for patching demo)
def siou_loss(pred, target, eps=1e-7):
    iou = bbox_iou(xywh2xyxy(pred), xywh2xyxy(target), xywh=False, CIoU=True)
    return iou  # Placeholder: use your full SIoU logic

BboxLoss.iou_function = siou_loss
print("Custom module 'TimmBackbone' and SIoU loss patched.")

# ===================================================================
# STEP 3: CREATE CUSTOM MODEL YAML FILE
# ===================================================================
print("\nCreating custom model YAML...")

yolov8s_swin_yaml = """
nc: 3
backbone:
  - [-1, 1, TimmBackbone, ['swin_small_patch4_window7_224']]

head:
  - [-1, 1, Conv, [512, 1, 1]]
  - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
  - [[-1, 1], 1, Concat, [1]]
  - [-1, 1, C2f, [896, 256, 1, False]]

  - [-1, 1, Conv, [256, 1, 1]]
  - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
  - [[-1, 0], 1, Concat, [1]]
  - [-1, 1, C2f, [448, 128, 1, False]]

  - [-1, 1, Conv, [128, 3, 2]]
  - [[-1, 6], 1, Concat, [1]]
  - [-1, 1, C2f, [384, 256, 1, False]]

  - [-1, 1, Conv, [256, 3, 2]]
  - [[-1, 3], 1, Concat, [1]]
  - [-1, 1, C2f, [768, 512, 1, False]]

  - [[8, 11, 14], 1, Detect, [nc]]
"""

working_dir = '/kaggle/working/'
yaml_path = os.path.join(working_dir, 'yolov8s-swin.yaml')
with open(yaml_path, 'w') as f:
    f.write(yolov8s_swin_yaml)
print("Custom Swin Transformer model YAML file created.")

# ===================================================================
# STEP 4: DATA PREPARATION (AUGMENTATION)
# ===================================================================
source_data_dir = '/kaggle/input/falcon/HackByte_Dataset/data'
new_data_dir = os.path.join(working_dir, 'data_augmented')

if not any(f.startswith('aug_occlusion_') for f in os.listdir(os.path.join(new_data_dir, 'train', 'images')) if os.path.exists(new_data_dir)):
    print("Performing data augmentation...")
    if os.path.exists(new_data_dir): shutil.rmtree(new_data_dir)
    shutil.copytree(source_data_dir, new_data_dir, dirs_exist_ok=True)
    # Example augmentation logic can be added here
else:
    print("Augmented data found. Skipping augmentation.")

# Create data config YAML
data_yaml_path = os.path.join(working_dir, 'data_config.yaml')
with open(data_yaml_path, 'w') as f:
    yaml.dump({
        'path': new_data_dir,
        'train': 'train/images',
        'val': 'val/images',
        'test': 'test/images',
        'nc': 3,
        'names': ['FireExtinguisher', 'ToolBox', 'OxygenTank']
    }, f)

# ===================================================================
# STEP 5: TRAIN THE MODEL
# ===================================================================
print("\nStarting model training...")
os.chdir(working_dir)

# Load and train
model.train(
    data=data_yaml_path,
    epochs=100,
    imgsz=640,
    batch=8,
    project='runs/detect',
    name='swin_yolov8s_siou',
    patience=25
)


Installing dependencies...
Installation complete!

Defining and patching custom modules...
Custom module 'TimmBackbone' and SIoU loss patched.

Creating custom model YAML...
Custom Swin Transformer model YAML file created.
Augmented data found. Skipping augmentation.

Starting model training...


NameError: name 'model' is not defined

In [32]:
# ===================================================================
# STEP 1: DEFINE AND PATCH CUSTOM MODULES AND LOSS FUNCTION
# ===================================================================
import os
import shutil
import cv2
import numpy as np
import random
import yaml
import torch
import torch.nn as nn
from ultralytics import YOLO
import timm
from ultralytics.nn import tasks
from ultralytics.nn.modules import Conv, C2f, Concat
from ultralytics.utils.loss import BboxLoss
from ultralytics.utils.ops import xywh2xyxy
from ultralytics.nn.tasks import Detect

print("\nDefining and patching custom modules for resuming...")

# 1. Custom Backbone Definition
class TimmBackbone(torch.nn.Module):
    def __init__(self, model_name='swin_small_patch4_window7_224', pretrained=True):
        super().__init__()
        self.model = timm.create_model(model_name, features_only=True, pretrained=pretrained, out_indices=(1, 2, 3))
        self.out_channels = self.model.feature_info.channels()
        print(f"Swin Backbone initialized. Output channels: {self.out_channels}")

    def forward(self, x):
        features = self.model(x)
        return features[1], features[2], features[3]

# 2. Patch the custom module into the tasks dictionary
tasks.TimmBackbone = TimmBackbone

# 3. Define and Patch the SIoU Loss
from ultralytics.utils.metrics import bbox_iou
def siou_loss(pred, target, eps=1e-7):
    iou = bbox_iou(xywh2xyxy(pred), xywh2xyxy(target), xywh=False, CIoU=True)
    return iou
BboxLoss.iou_function = siou_loss
print("Custom module 'TimmBackbone' and SIoU loss patched.")


# ===================================================================
# STEP 2: RESUME TRAINING
# ===================================================================
import os

print("\nResuming model training...")
working_dir = '/kaggle/working/'
new_data_dir = os.path.join(working_dir, 'data_augmented')
data_yaml_path = os.path.join(working_dir, 'data_config.yaml')
checkpoint_path = os.path.join(working_dir, 'runs/detect/swin_yolov8s_siou/weights/last.pt')

if not os.path.exists(checkpoint_path):
    print(f"Error: Checkpoint file not found at {checkpoint_path}")
    print("Please ensure your training run saved a 'last.pt' file in the correct directory.")
else:
    # Change the current directory to where the training happened
    os.chdir(working_dir)

    # Load the last checkpoint
    model = YOLO(checkpoint_path)

    # Resume training from the loaded checkpoint with the same parameters
    model.train(
        data=data_yaml_path,
        epochs=100,
        imgsz=640,
        batch=8,
        project='runs/detect',
        name='swin_yolov8s_siou',
        patience=25,
        resume=True # <<< This is the key argument
    )


Defining and patching custom modules for resuming...
Custom module 'TimmBackbone' and SIoU loss patched.

Resuming model training...
Ultralytics 8.3.172 🚀 Python-3.11.13 torch-2.6.0+cu124 CUDA:0 (Tesla T4, 15095MiB)
[34m[1mengine/trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=8, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=/kaggle/working/data_config.yaml, degrees=0.0, deterministic=True, device=None, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=100, erasing=0.4, exist_ok=False, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=640, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.01, lrf=0.01, mask_ratio=4, max_det=300, mixup=0.0, mode=train, model=/kaggle/working/runs/detect/swin_yolov8s_siou/weights/last.pt, mom

[34m[1mtrain: [0mScanning /kaggle/working/data_augmented/train/labels.cache... 846 images, 5 backgrounds, 0 corrupt: 100%|██████████| 846/846 [00:00<?, ?it/s]

[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01, method='weighted_average', num_output_channels=3), CLAHE(p=0.01, clip_limit=(1.0, 4.0), tile_grid_size=(8, 8))





[34m[1mval: [0mFast image access ✅ (ping: 0.0±0.0 ms, read: 2081.1±1419.0 MB/s, size: 3015.9 KB)


[34m[1mval: [0mScanning /kaggle/working/data_augmented/val/labels.cache... 154 images, 0 backgrounds, 0 corrupt: 100%|██████████| 154/154 [00:00<?, ?it/s]


Plotting labels to runs/detect/swin_yolov8s_siou/labels.jpg... 
[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m AdamW(lr=0.001429, momentum=0.9) with parameter groups 57 weight(decay=0.0), 64 weight(decay=0.0005), 63 bias(decay=0.0)
Resuming training /kaggle/working/runs/detect/swin_yolov8s_siou/weights/last.pt from epoch 84 to 100 total epochs
Image sizes 640 train, 640 val
Using 2 dataloader workers
Logging results to [1mruns/detect/swin_yolov8s_siou[0m
Starting training for 100 epochs...

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     84/100      7.71G     0.3905     0.3068     0.8682         11        640: 100%|██████████| 106/106 [00:28<00:00,  3.71it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 10/10 [00:02<00:00,  3.47it/s]

                   all        154        206      0.994      0.912      0.952      0.888






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     85/100      2.27G     0.3764     0.2825     0.8638         23        640: 100%|██████████| 106/106 [00:28<00:00,  3.72it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 10/10 [00:03<00:00,  3.20it/s]

                   all        154        206      0.991      0.918       0.96      0.897






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     86/100      2.27G     0.3589     0.2707     0.8541         19        640: 100%|██████████| 106/106 [00:28<00:00,  3.73it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 10/10 [00:03<00:00,  3.20it/s]

                   all        154        206      0.991      0.918      0.958      0.894






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     87/100      2.27G     0.3625     0.2791     0.8543         14        640: 100%|██████████| 106/106 [00:28<00:00,  3.76it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 10/10 [00:03<00:00,  3.15it/s]

                   all        154        206      0.989      0.932      0.956       0.89






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     88/100      2.27G     0.3397     0.2583     0.8494         13        640: 100%|██████████| 106/106 [00:28<00:00,  3.69it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 10/10 [00:03<00:00,  3.17it/s]

                   all        154        206      0.993      0.914      0.955      0.892






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     89/100      2.27G     0.3482     0.2554     0.8488         21        640: 100%|██████████| 106/106 [00:28<00:00,  3.73it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 10/10 [00:03<00:00,  3.04it/s]

                   all        154        206      0.993      0.916      0.955      0.889






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     90/100      2.27G      0.344     0.2653     0.8574         13        640: 100%|██████████| 106/106 [00:28<00:00,  3.71it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 10/10 [00:03<00:00,  3.23it/s]

                   all        154        206      0.984      0.922      0.953       0.89





Closing dataloader mosaic
[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01, method='weighted_average', num_output_channels=3), CLAHE(p=0.01, clip_limit=(1.0, 4.0), tile_grid_size=(8, 8))

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     91/100      2.27G     0.2621     0.2029     0.7833          8        640: 100%|██████████| 106/106 [00:29<00:00,  3.59it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 10/10 [00:03<00:00,  3.15it/s]

                   all        154        206      0.993      0.903      0.947      0.886






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     92/100      2.31G     0.2612     0.1957     0.7775          8        640: 100%|██████████| 106/106 [00:27<00:00,  3.79it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 10/10 [00:03<00:00,  3.16it/s]

                   all        154        206       0.99      0.903      0.944       0.89






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     93/100      2.31G     0.2638     0.1972     0.7885          6        640: 100%|██████████| 106/106 [00:28<00:00,  3.76it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 10/10 [00:03<00:00,  3.15it/s]

                   all        154        206      0.986      0.909      0.948      0.891






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     94/100      2.31G     0.2608      0.193     0.7816         10        640: 100%|██████████| 106/106 [00:28<00:00,  3.78it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 10/10 [00:03<00:00,  3.16it/s]

                   all        154        206      0.986      0.921      0.953      0.892






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     95/100      2.31G     0.2607     0.1926     0.7818          8        640: 100%|██████████| 106/106 [00:28<00:00,  3.77it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 10/10 [00:03<00:00,  3.07it/s]

                   all        154        206      0.986      0.918      0.954      0.892






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     96/100      2.31G     0.2459     0.1849     0.7747          7        640: 100%|██████████| 106/106 [00:28<00:00,  3.76it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 10/10 [00:03<00:00,  3.29it/s]

                   all        154        206      0.984      0.913      0.949       0.89






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     97/100      2.34G     0.2569     0.1915     0.7864          7        640: 100%|██████████| 106/106 [00:28<00:00,  3.76it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 10/10 [00:03<00:00,  3.18it/s]

                   all        154        206      0.986      0.918      0.952      0.894






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     98/100      2.34G     0.2531     0.1902     0.7832          8        640: 100%|██████████| 106/106 [00:28<00:00,  3.79it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 10/10 [00:03<00:00,  3.12it/s]

                   all        154        206      0.985      0.919      0.948      0.889






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     99/100      2.34G     0.2507     0.1834     0.7824          8        640: 100%|██████████| 106/106 [00:28<00:00,  3.74it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 10/10 [00:03<00:00,  3.21it/s]

                   all        154        206      0.985      0.919      0.948      0.893






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


    100/100       2.4G     0.2509     0.1883     0.7879          7        640: 100%|██████████| 106/106 [00:27<00:00,  3.79it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 10/10 [00:03<00:00,  3.12it/s]

                   all        154        206      0.985      0.919      0.949      0.892






17 epochs completed in 0.152 hours.
Optimizer stripped from runs/detect/swin_yolov8s_siou/weights/last.pt, 22.5MB
Optimizer stripped from runs/detect/swin_yolov8s_siou/weights/best.pt, 22.5MB

Validating runs/detect/swin_yolov8s_siou/weights/best.pt...
Ultralytics 8.3.172 🚀 Python-3.11.13 torch-2.6.0+cu124 CUDA:0 (Tesla T4, 15095MiB)
Model summary (fused): 72 layers, 11,126,745 parameters, 0 gradients, 28.4 GFLOPs


                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 10/10 [00:03<00:00,  2.96it/s]
  xa[xa < 0] = -1
  xa[xa < 0] = -1


                   all        154        206      0.991      0.918       0.96      0.897
      FireExtinguisher         67         67      0.999      0.955      0.978      0.923
               ToolBox         60         60      0.991        0.9      0.947      0.918
            OxygenTank         79         79      0.984      0.899      0.953      0.849
Speed: 0.2ms preprocess, 3.3ms inference, 0.0ms loss, 1.7ms postprocess per image
Results saved to [1mruns/detect/swin_yolov8s_siou[0m


In [36]:
# ===================================================================
# STEP 1: IMPORT DEPENDENCIES AND SET UP
# ===================================================================
import os
from ultralytics import YOLO
import yaml

print("Setting up paths for validation...")
working_dir = '/kaggle/working/'
results_dir = os.path.join(working_dir, 'runs/detect/train2')
best_model_path = os.path.join(results_dir, 'weights/best.pt')
yolo_params_path = os.path.join(working_dir, 'yolo_params.yaml')

# --- Check if the model and data config exist before proceeding ---
if not os.path.exists(best_model_path):
    print(f"Error: Best model not found at {best_model_path}")
    print("Please ensure your training run completed successfully and the path is correct.")
elif not os.path.exists(yolo_params_path):
    print(f"Error: yolo_params.yaml not found at {yolo_params_path}")
    print("Please ensure the file exists in your working directory.")
else:
    print(f"Found trained model at: {best_model_path}")
    print(f"Found data config at: {yolo_params_path}")
    
    # --- Update yolo_params.yaml to point to the test set ---
    print("\nUpdating yolo_params.yaml to point to the test dataset for validation.")
    with open(yolo_params_path, 'r') as f:
        yolo_config = yaml.safe_load(f)

    # CRITICAL FIX: The val path must be relative to the 'path' key
    # which is already set to '/kaggle/working/data_augmented'.
    yolo_config['val'] = 'test/images'
    
    with open(yolo_params_path, 'w') as f:
        yaml.dump(yolo_config, f, sort_keys=False)

    # ===================================================================
    # STEP 2: RUN THE MODEL ON THE TEST SET
    # ===================================================================
    print("\nStarting validation on the test set...")

    # Change the current directory to where the model was trained
    os.chdir(working_dir)

    # Load the best model from the training run
    model = YOLO(best_model_path)

    # Validate the model. The results will be printed and saved to the results directory.
    results = model.val(data=yolo_params_path)

   
print("\n--- Final Performance Metrics on Test Set ---")
# CORRECTED ATTRIBUTES: maps50 -> map50, maps -> map
print(f"Overall mAP50: {results.box.map50}")
print(f"Overall mAP50-95: {results.box.map}")
print(f"Class-specific P, R, mAP50, mAP50-95: {results.box.mp}, {results.box.mr}, {results.box.map50}, {results.box.map}")
print("------------------------------------------")


Setting up paths for validation...
Found trained model at: /kaggle/working/runs/detect/train2/weights/best.pt
Found data config at: /kaggle/working/yolo_params.yaml

Updating yolo_params.yaml to point to the test dataset for validation.

Starting validation on the test set...
Ultralytics 8.3.172 🚀 Python-3.11.13 torch-2.6.0+cu124 CUDA:0 (Tesla T4, 15095MiB)
Model summary (fused): 72 layers, 11,126,745 parameters, 0 gradients, 28.4 GFLOPs
[34m[1mval: [0mFast image access ✅ (ping: 0.0±0.0 ms, read: 2978.1±1225.1 MB/s, size: 1813.1 KB)


[34m[1mval: [0mScanning /kaggle/working/data_augmented/test/labels.cache... 400 images, 0 backgrounds, 0 corrupt: 100%|██████████| 400/400 [00:00<?, ?it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 25/25 [00:11<00:00,  2.18it/s]
  xa[xa < 0] = -1
  xa[xa < 0] = -1


                   all        400        560      0.955      0.809      0.889      0.808
      FireExtinguisher        183        183      0.962      0.847      0.897      0.789
               ToolBox        193        193      0.953      0.813      0.903      0.835
            OxygenTank        184        184       0.95      0.766      0.868      0.802
Speed: 0.4ms preprocess, 4.9ms inference, 0.0ms loss, 1.2ms postprocess per image
Results saved to [1mruns/detect/val5[0m

--- Final Performance Metrics on Test Set ---
Overall mAP50: 0.8889286585375306
Overall mAP50-95: 0.8084343011137606
Class-specific P, R, mAP50, mAP50-95: 0.9549166153102506, 0.8089234619786287, 0.8889286585375306, 0.8084343011137606
------------------------------------------
