# YOLOv8 model for Underwater Object Detection

This is a replication of the model/architecture purposed in **A lightweight YOLOv8 integrating FasterNet for real-time underwater object detection**. The paper can be found at [this link](https://www.researchgate.net/publication/378873018_A_lightweight_YOLOv8_integrating_FasterNet_for_real-time_underwater_object_detection). 

In [1]:
!pip install ultralytics==8.2.103 -q

from IPython import display
display.clear_output()

# prevent ultralytics from tracking your activity
!yolo settings sync=False

import ultralytics
ultralytics.checks()

Ultralytics YOLOv8.2.103 🚀 Python-3.12.4 torch-2.8.0 CPU (Apple M3)
Setup complete ✅ (8 CPUs, 8.0 GB RAM, 277.6/460.4 GB disk)


In [2]:
from ultralytics import YOLO

from IPython.display import display, Image

In [3]:
# uwyolov8_fasternet.py
# Implements: PConv, FasterBlock, FasterNetBackbone (features @ 8/16/32),
# GSConv, LC2f, SPPF, and FBiFPN (concat-based, no P2), matching the paper.

import math
import torch
import torch.nn as nn
import torch.nn.functional as F

# ---------- Small helpers ----------
def autopad(k, p=None):  # kernel, padding
    if p is None:
        p = k // 2
    return p

class ConvBNAct(nn.Module):
    def __init__(self, c1, c2, k=1, s=1, act="silu", groups=1):
        super().__init__()
        self.conv = nn.Conv2d(c1, c2, k, s, autopad(k), groups=groups, bias=False)
        self.bn   = nn.BatchNorm2d(c2)
        if act == "silu":
            self.act = nn.SiLU(inplace=True)
        elif act == "gelu":
            self.act = nn.GELU()
        elif act is None:
            self.act = nn.Identity()
        else:
            raise ValueError("Unknown act")
    def forward(self, x):
        return self.act(self.bn(self.conv(x)))

# ---------- GSConv (Slim-neck by GSConv) ----------
class GSConv(nn.Module):
    """
    GSConv blends standard conv + depthwise conv + shuffle.
    """
    def __init__(self, c1, c2, k=3, s=1):
        super().__init__()
        c_mid = max(c2 // 2, 1)
        self.sc = ConvBNAct(c1, c_mid, k=1, s=1)          # standard 1x1
        self.dw = nn.Conv2d(c_mid, c_mid, k, s, autopad(k), groups=c_mid, bias=False)  # depthwise
        self.dwb = nn.BatchNorm2d(c_mid)
        self.act = nn.SiLU(inplace=True)
        self.pw = ConvBNAct(c_mid, c2 - c_mid, k=1, s=1)  # pointwise for the other branch
    def channel_shuffle(self, x, groups=2):
        b, c, h, w = x.size()
        assert c % groups == 0
        x = x.view(b, groups, c // groups, h, w)
        x = torch.transpose(x, 1, 2).contiguous()
        return x.view(b, c, h, w)
    def forward(self, x):
        a = self.sc(x)
        a = self.act(self.dwb(self.dw(a)))
        b = self.pw(self.sc.conv(x))  # reuse pre-1x1 conv weights input pathway
        out = torch.cat([a, b], 1)
        return self.channel_shuffle(out)

# ---------- FasterNet primitives ----------
class PConv(nn.Module):
    """
    Partial convolution: convolve a ratio r of channels, pass-through the rest.
    """
    def __init__(self, c1, c2, k=3, s=1, r=0.25, act="gelu"):
        super().__init__()
        cp = max(1, int(round(c1 * r)))
        self.cp = cp
        self.conv = ConvBNAct(cp, cp, k, s, act=act)
        self.proj = ConvBNAct(c1, c2, k=1, s=1, act=act)
        self.split = cp
    def forward(self, x):
        x_id, x_conv = x[:, self.split:], x[:, :self.split]
        x_conv = self.conv(x_conv)
        x = torch.cat([x_conv, x_id], 1)
        return self.proj(x)

class FasterBlock(nn.Module):
    def __init__(self, c, r=0.25):
        super().__init__()
        self.pconv = PConv(c, c, k=3, s=1, r=r, act="gelu")
    def forward(self, x):
        return x + self.pconv(x)

class MergeDown(nn.Module):
    def __init__(self, c1, c2, s=2):
        super().__init__()
        self.conv = ConvBNAct(c1, c2, k=2 if s==2 else 4, s=s, act="gelu")
    def forward(self, x):
        return self.conv(x)

class FasterNetStage(nn.Module):
    def __init__(self, c, n, r=0.25):
        super().__init__()
        self.blocks = nn.Sequential(*[FasterBlock(c, r=r) for _ in range(n)])
    def forward(self, x):
        return self.blocks(x)

class FasterNetBackbone(nn.Module):
    """
    Outputs P3 (80x80), P4 (40x40), P5 (20x20) for 640 input, with channels 128/256/512,
    matching Table 2 stage progression, and **drops P2 (160x160)** as in the paper’s FBiFPN. 
    """
    def __init__(self):
        super().__init__()
        # Input 640x640x3
        self.m1 = MergeDown(3,   64, s=4)   # 160x160
        self.s1 = FasterNetStage(64,  n=1)

        self.m2 = MergeDown(64, 128, s=2)   # 80x80  -> P3 (keep)
        self.s2 = FasterNetStage(128, n=2)

        self.m3 = MergeDown(128,256, s=2)   # 40x40  -> P4
        self.s3 = FasterNetStage(256, n=8)

        self.m4 = MergeDown(256,512, s=2)   # 20x20  -> P5
        self.s4 = FasterNetStage(512, n=2)

        self.out_channels = (128, 256, 512)

    def forward(self, x):
        x = self.s1(self.m1(x))   # 160
        p3 = self.s2(self.m2(x))  # 80
        p4 = self.s3(self.m3(p3)) # 40
        p5 = self.s4(self.m4(p4)) # 20
        return [p3, p4, p5]

    @property
    def channels(self):
        return self.out_channels

# ---------- SPPF (as in YOLOv5/8) ----------
class SPPF(nn.Module):
    def __init__(self, c1, c2, k=5):
        super().__init__()
        c_ = c1 // 2
        self.cv1 = ConvBNAct(c1, c_, 1, 1)
        self.cv2 = ConvBNAct(c_ * 4, c2, 1, 1)
        self.m = nn.MaxPool2d(kernel_size=k, stride=1, padding=k // 2)
    def forward(self, x):
        x = self.cv1(x)
        y1 = self.m(x)
        y2 = self.m(y1)
        y3 = self.m(y2)
        return self.cv2(torch.cat([x, y1, y2, y3], 1))

# ---------- LC2f (Lightweight-C2f) ----------
class LC2f(nn.Module):
    """
    C2f-like: split into n branches of FasterBlocks, concat, then GSConv instead of final conv.
    """
    def __init__(self, c1, c2, n=2, shortcut=True):
        super().__init__()
        c_ = int(c2 // 2)
        self.cv1 = ConvBNAct(c1, c_, 1, 1)
        self.m = nn.ModuleList(FasterBlock(c_, r=0.25) for _ in range(n))
        self.shortcut = shortcut
        self.cv2 = GSConv(c_ * (n + 1), c2, k=3, s=1)
    def forward(self, x):
        y = [self.cv1(x)]
        for m in self.m:
            y.append(m(y[-1] if self.shortcut else y[0]))
        return self.cv2(torch.cat(y, 1))

# ---------- FBiFPN (concat-based; P3,P4,P5 only; SPPF on P5 first) ----------
class FBiFPN(nn.Module):
    """
    Fast BiFPN variant:
      - Inputs: P3(80), P4(40), P5(20)
      - First apply SPPF on P5 (as per paper)
      - Unify to 256 channels
      - Top-down and bottom-up paths with CONCAT fusion
    """
    def __init__(self, c3=128, c4=256, c5=512, c=256, repeats=1):
        super().__init__()
        self.c3_in = ConvBNAct(c3, c, 1, 1)
        self.c4_in = ConvBNAct(c4, c, 1, 1)
        self.c5_in = ConvBNAct(c5, c, 1, 1)

        self.sppf = SPPF(c, c)

        # Top-down
        self.p5_td = LC2f(c, c, n=1)
        self.p4_td = LC2f(c*2, c, n=1)  # concat(P4_in, up(P5_td))
        self.p3_td = LC2f(c*2, c, n=1)  # concat(P3_in, up(P4_td))

        # Bottom-up
        self.p4_bu = LC2f(c*2, c, n=1)  # concat(P4_in, down(P3_td))
        self.p5_bu = LC2f(c*2, c, n=1)  # concat(P5_td_raw, down(P4_bu))

        self.up = nn.Upsample(scale_factor=2, mode="nearest")
        self.d3 = ConvBNAct(c, c, k=3, s=2)  # downsample
        self.d4 = ConvBNAct(c, c, k=3, s=2)

    def forward(self, p3, p4, p5):
        p3 = self.c3_in(p3)
        p4 = self.c4_in(p4)
        p5 = self.c5_in(p5)

        p5s = self.sppf(p5)

        # top-down
        p5_td = self.p5_td(p5s)
        p4_td = self.p4_td(torch.cat([p4, self.up(p5_td)], 1))
        p3_td = self.p3_td(torch.cat([p3, self.up(p4_td)], 1))

        # bottom-up
        p4_bu = self.p4_bu(torch.cat([p4, self.d3(p3_td)], 1))
        p5_bu = self.p5_bu(torch.cat([p5_td, self.d4(p4_bu)], 1))

        return [p3_td, p4_bu, p5_bu]


In [None]:
# yolov8_uw_fasternet.yaml
# UW-YOLOv8 (FasterNet backbone + FBiFPN (concat) + LC2f) with Detect head
# Strides: [8, 16, 32]; Inputs: P3(80), P4(40), P5(20) for imgsz=640

nc: 80  # <-- set to your classes
depth_multiple: 1.0
width_multiple: 1.0

# Import note: ensure `uwyolov8_fasternet.py` is importable when loading this.
backbone:
  - [-1, 1, FasterNetBackbone, []]         # returns [P3,P4,P5] with chans (128,256,512)

neck:
  - [[0], 1, FBiFPN, [128, 256, 512, 256, 1]]  # unify to 256, concat fusion, SPPF on P5

head:
  # YOLOv8 Detect head expects feature list (P3, P4, P5)
  - [[1], 1, Detect, [nc]]
