# LEAF-YOLO: Lightweight Aerial Small Object Detection

This notebook implements the complete LEAF-YOLO architecture for efficient small object detection in aerial imagery, specifically designed for the VisDrone dataset. LEAF-YOLO introduces novel lightweight mechanisms while maintaining high accuracy for edge deployment on UAV platforms.

## Model Specifications:
- **LEAF-YOLO-N**: 1.2M parameters, 5.6G FLOPs, 56 FPS on Jetson AGX Xavier
- **LEAF-YOLO**: 4.28M parameters, 20.9G FLOPs, 32 FPS on Jetson AGX Xavier
- **Target Dataset**: VisDrone2019-DET with 10 classes
- **Performance**: 28.2% mAP@50:95, 48.3% mAP@50 on VisDrone validation set

## 1. Import Required Libraries

Import all necessary libraries for building and training the LEAF-YOLO model.

In [None]:
# Core PyTorch libraries
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader
import torchvision.transforms as transforms

# Numerical and scientific computing
import numpy as np
import math
import copy
from pathlib import Path
import yaml
import json

# Visualization and plotting
import matplotlib.pyplot as plt
import seaborn as sns
from PIL import Image
import cv2

# Utilities
import os
import time
import random
from tqdm import tqdm
import warnings
warnings.filterwarnings('ignore')

# Set random seeds for reproducibility
torch.manual_seed(42)
np.random.seed(42)
random.seed(42)

print("Libraries imported successfully!")
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")

## 2. Define Core Modules (GhostConv, PConv, CoordConvATT)

Implement the fundamental building blocks of LEAF-YOLO architecture.

In [None]:
class Conv(nn.Module):
    """Standard convolution with batch normalization and activation"""
    def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True):
        super().__init__()
        self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=False)
        self.bn = nn.BatchNorm2d(c2)
        self.act = nn.SiLU() if act is True else (act if isinstance(act, nn.Module) else nn.Identity())

    def forward(self, x):
        return self.act(self.bn(self.conv(x)))

def autopad(k, p=None):
    """Auto-pad to maintain same dimensions"""
    if p is None:
        p = k // 2 if isinstance(k, int) else [x // 2 for x in k]
    return p

class GhostConv(nn.Module):
    """Ghost Convolution - generates more features from fewer parameters"""
    def __init__(self, c1, c2, k=1, s=1, g=1, act=True):
        super().__init__()
        c_ = c2 // 2  # hidden channels
        self.cv1 = Conv(c1, c_, k, s, None, g, act)
        self.cv2 = Conv(c_, c_, 5, 1, None, c_, act)

    def forward(self, x):
        y = self.cv1(x)
        return torch.cat([y, self.cv2(y)], 1)

class PConv(nn.Module):
    """Partial Convolution - applies convolution to partial channels"""
    def __init__(self, c1, c2=None, k=3, s=1, p=1, g=1):
        super().__init__()
        c2 = c2 or c1
        self.dim = c1 // 4  # Use 1/4 of channels for convolution
        self.conv = nn.Conv2d(self.dim, self.dim, k, s, p, groups=g, bias=False)
        self.bn = nn.BatchNorm2d(self.dim)
        self.act = nn.SiLU()
        
    def forward(self, x):
        x1, x2 = torch.split(x, [self.dim, x.shape[1] - self.dim], dim=1)
        x1 = self.act(self.bn(self.conv(x1)))
        return torch.cat([x1, x2], dim=1)

class ECA(nn.Module):
    """Efficient Channel Attention"""
    def __init__(self, channel, b=1, gamma=2):
        super().__init__()
        t = int(abs((math.log(channel, 2) + b) / gamma))
        k = t if t % 2 else t + 1
        self.avg_pool = nn.AdaptiveAvgPool2d(1)
        self.conv = nn.Conv1d(1, 1, kernel_size=k, padding=int(k/2), bias=False)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        y = self.avg_pool(x)
        y = self.conv(y.squeeze(-1).transpose(-1, -2))
        y = y.transpose(-1, -2).unsqueeze(-1)
        y = self.sigmoid(y)
        return x * y

class CoordAtt(nn.Module):
    """Coordinate Attention mechanism"""
    def __init__(self, inp, oup, reduction=32):
        super().__init__()
        self.pool_h = nn.AdaptiveAvgPool2d((None, 1))
        self.pool_w = nn.AdaptiveAvgPool2d((1, None))

        mip = max(8, inp // reduction)

        self.conv1 = nn.Conv2d(inp, mip, kernel_size=1, stride=1, padding=0)
        self.bn1 = nn.BatchNorm2d(mip)
        self.act = nn.SiLU()
        
        self.conv_h = nn.Conv2d(mip, oup, kernel_size=1, stride=1, padding=0)
        self.conv_w = nn.Conv2d(mip, oup, kernel_size=1, stride=1, padding=0)
        
    def forward(self, x):
        n, c, h, w = x.size()
        
        x_h = self.pool_h(x)
        x_w = self.pool_w(x).permute(0, 1, 3, 2)
        
        y = torch.cat([x_h, x_w], dim=2)
        y = self.conv1(y)
        y = self.bn1(y)
        y = self.act(y)
        
        x_h, x_w = torch.split(y, [h, w], dim=2)
        x_w = x_w.permute(0, 1, 3, 2)
        
        a_h = self.conv_h(x_h).sigmoid()
        a_w = self.conv_w(x_w).sigmoid()
        
        out = x * a_w * a_h
        
        return out

class CoordConvATT(nn.Module):
    """Coordinate Convolution with Attention"""
    def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True):
        super().__init__()
        self.conv = Conv(c1, c2, k, s, p, g, act)
        self.coord_att = CoordAtt(c2, c2)
        
    def forward(self, x):
        x = self.conv(x)
        x = self.coord_att(x)
        return x

print("Core modules defined successfully!")

## 3. Implement LEAF Blocks and CSP Components

Create the LEAF mechanism and Cross-Stage Partial (CSP) blocks with Bottle2neck modules.

In [None]:
class Bottle2neck(nn.Module):
    """Res2Net Bottleneck block with multiple scales"""
    def __init__(self, c1, c2, shortcut=True, g=1, e=0.5, scale=4, stype='normal'):
        super().__init__()
        c_ = int(c2 * e)  # hidden channels
        self.scale = scale
        self.nums = scale - 1
        
        self.conv1 = Conv(c1, c_, 1, 1)
        
        # Multiple scale convolutions
        self.convs = nn.ModuleList()
        self.bns = nn.ModuleList()
        for i in range(self.nums):
            self.convs.append(Conv(c_ // scale, c_ // scale, 3, 1))
            
        self.conv2 = Conv(c_, c2, 1, 1, act=False)
        self.add = shortcut and c1 == c2

    def forward(self, x):
        residual = x
        out = self.conv1(x)
        
        spx = torch.split(out, out.size(1) // self.scale, 1)
        sp_outs = [spx[0]]
        
        for i, conv in enumerate(self.convs):
            sp = spx[i + 1] + sp_outs[i] if i >= 1 else spx[i + 1]
            sp_out = conv(sp)
            sp_outs.append(sp_out)
            
        out = torch.cat(sp_outs, 1)
        out = self.conv2(out)
        
        return out + residual if self.add else out

class C3_Res2Block(nn.Module):
    """CSP Bottleneck with Res2Net blocks"""
    def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
        super().__init__()
        c_ = int(c2 * e)  # hidden channels
        self.cv1 = Conv(c1, c_, 1, 1)
        self.cv2 = Conv(c1, c_, 1, 1)
        self.cv3 = Conv(2 * c_, c2, 1)
        
        self.m = nn.Sequential(*(Bottle2neck(c_, c_, shortcut, g, e=1.0) for _ in range(n)))

    def forward(self, x):
        return self.cv3(torch.cat((self.m(self.cv1(x)), self.cv2(x)), dim=1))

class LEAFBlock(nn.Module):
    """Lightweight-Efficient Aggregating Fusion (LEAF) Block"""
    def __init__(self, c1, c2, n_pconv=4):
        super().__init__()
        self.c_split = c1 // 2
        
        # Split into two paths
        self.cv1 = Conv(self.c_split, self.c_split, 1, 1)
        self.cv2 = Conv(self.c_split, self.c_split, 1, 1)
        
        # Multiple PConv layers for feature aggregation
        self.pconvs = nn.ModuleList([PConv(self.c_split) for _ in range(n_pconv)])
        
        # Final fusion
        self.cv_out = Conv(c1 + self.c_split * n_pconv, c2, 1, 1)
        
    def forward(self, x):
        # Split input
        x1, x2 = torch.split(x, [self.c_split, x.shape[1] - self.c_split], dim=1)
        
        # Process first path
        y1 = self.cv1(x1)
        
        # Process second path with multiple PConv
        y2 = self.cv2(x2)
        pconv_outputs = [y2]
        
        for pconv in self.pconvs:
            y2 = pconv(y2)
            pconv_outputs.append(y2)
            
        # Concatenate all outputs
        out = torch.cat([x1, y1] + pconv_outputs, dim=1)
        return self.cv_out(out)

class MP(nn.Module):
    """MaxPool wrapper"""
    def __init__(self, k=2):
        super().__init__()
        self.m = nn.MaxPool2d(kernel_size=k, stride=k)

    def forward(self, x):
        return self.m(x)

class Concat(nn.Module):
    """Concatenate tensors along dimension"""
    def __init__(self, dimension=1):
        super().__init__()
        self.d = dimension

    def forward(self, x):
        return torch.cat(x, self.d)

print("LEAF blocks and CSP components implemented successfully!")

## 4. Build Backbone Architecture

Construct the hierarchical backbone with progressive feature extraction through 5 stages.

In [None]:
class LEAFBackbone(nn.Module):
    """LEAF-YOLO Backbone with 5 stages of feature extraction"""
    def __init__(self, width_multiple=0.5, depth_multiple=1.0):
        super().__init__()
        self.width_multiple = width_multiple
        self.depth_multiple = depth_multiple
        
        # Calculate channel numbers based on width_multiple
        def make_divisible(x, divisor=8):
            return math.ceil(x / divisor) * divisor
        
        # Channel configuration for nano version
        channels = [32, 64, 128, 256, 256]
        channels = [make_divisible(c * width_multiple) for c in channels]
        
        # Stage 0: Input processing (640x640 -> 320x320)
        self.stem = Conv(3, channels[0], 3, 2)  # P1/2
        
        # Stage 1: P2 (320x320 -> 160x160) 
        self.stage1 = nn.Sequential(
            Conv(channels[0], channels[1], 3, 2),  # P2/4
            GhostConv(channels[1], channels[1], 3, 1),
            LEAFBlock(channels[1], channels[1]),
            C3_Res2Block(channels[1], channels[1])
        )
        
        # Stage 2: P3 (160x160 -> 80x80)
        self.stage2_down = nn.Sequential(
            MP(2),
            Conv(channels[1], channels[1], 1, 1),
        )
        self.stage2_ghost = nn.Sequential(
            Conv(channels[1], channels[1], 1, 1),
            GhostConv(channels[1], channels[1], 3, 2),
        )
        self.stage2_leaf = LEAFBlock(channels[1] * 2, channels[1])
        self.stage2_c3 = C3_Res2Block(channels[1], channels[2])
        
        # Stage 3: P4 (80x80 -> 40x40)
        self.stage3_down = nn.Sequential(
            MP(2),
            Conv(channels[2], channels[2], 1, 1),
        )
        self.stage3_ghost = nn.Sequential(
            Conv(channels[2], channels[2], 1, 1),
            GhostConv(channels[2], channels[2], 3, 2),
        )
        self.stage3_leaf = LEAFBlock(channels[2] * 2, channels[2])
        self.stage3_c3 = C3_Res2Block(channels[2], channels[3])
        
        # Stage 4: P5 (40x40 -> 20x20)
        self.stage4_down = nn.Sequential(
            MP(2),
            Conv(channels[3], channels[3], 1, 1),
        )
        self.stage4_ghost = nn.Sequential(
            Conv(channels[3], channels[3], 1, 1),
            GhostConv(channels[3], channels[3], 3, 2),
        )
        self.stage4_leaf = LEAFBlock(channels[3] * 2, channels[3])
        self.stage4_c3 = C3_Res2Block(channels[3], channels[4])
        
    def forward(self, x):
        outputs = []
        
        # Stem: 640x640 -> 320x320
        x = self.stem(x)  # P1
        
        # Stage 1: 320x320 -> 160x160
        x = self.stage1(x)  # P2
        outputs.append(x)
        
        # Stage 2: 160x160 -> 80x80  
        x1 = self.stage2_down(x)
        x2 = self.stage2_ghost(x)
        x = torch.cat([x1, x2], dim=1)  # Concat
        x = self.stage2_leaf(x)
        x = self.stage2_c3(x)  # P3
        outputs.append(x)
        
        # Stage 3: 80x80 -> 40x40
        x1 = self.stage3_down(x)
        x2 = self.stage3_ghost(x)
        x = torch.cat([x1, x2], dim=1)  # Concat
        x = self.stage3_leaf(x)
        x = self.stage3_c3(x)  # P4
        outputs.append(x)
        
        # Stage 4: 40x40 -> 20x20
        x1 = self.stage4_down(x)
        x2 = self.stage4_ghost(x)
        x = torch.cat([x1, x2], dim=1)  # Concat
        x = self.stage4_leaf(x)
        x = self.stage4_c3(x)  # P5
        outputs.append(x)
        
        return outputs  # [P2, P3, P4, P5]

# Test backbone
print("Testing LEAF Backbone...")
backbone = LEAFBackbone(width_multiple=0.5)
test_input = torch.randn(1, 3, 640, 640)
features = backbone(test_input)

print("Backbone output shapes:")
for i, feat in enumerate(features):
    print(f"P{i+2}: {feat.shape}")
    
print("Backbone implemented successfully!")

## 5. Create Neck (Feature Pyramid Network)

Implement the PANet-style neck with SPPRFEM and bidirectional feature fusion.

In [None]:
class SPPRFEM(nn.Module):
    """Spatial Pyramid Pooling with Receptive Field Enhancement Module"""
    def __init__(self, c1, c2, k=(5, 9, 13)):
        super().__init__()
        c_ = c1 // 2  # hidden channels
        self.cv1 = Conv(c1, c_, 1, 1)
        self.cv2 = Conv(c_ * (len(k) + 1), c2, 1, 1)
        self.m = nn.ModuleList([nn.MaxPool2d(kernel_size=x, stride=1, padding=x // 2) for x in k])

    def forward(self, x):
        x = self.cv1(x)
        return self.cv2(torch.cat([x] + [m(x) for m in self.m], 1))

class LEAFNeck(nn.Module):
    """LEAF-YOLO Neck with PANet-style feature fusion"""
    def __init__(self, channels=[64, 128, 256, 256]):
        super().__init__()
        
        # SPPF for P5
        self.sppf = SPPRFEM(channels[3], channels[2])
        
        # Top-down path (P5 -> P4 -> P3 -> P2)
        self.up_conv1 = CoordConvATT(channels[2], channels[1], 1, 1)
        self.up_conv2 = CoordConvATT(channels[1], channels[0], 1, 1)  
        self.up_conv3 = CoordConvATT(channels[0], channels[0], 1, 1)
        
        # Upsample layers
        self.upsample = nn.Upsample(None, 2, 'nearest')
        
        # Fusion blocks after concatenation
        self.fusion1 = C3_Res2Block(channels[2] + channels[1], channels[1])  # P4 fusion
        self.fusion2 = C3_Res2Block(channels[1] + channels[0], channels[0])  # P3 fusion  
        self.fusion3 = C3_Res2Block(channels[0] * 2, channels[0])  # P2 fusion
        
        # Bottom-up path (P2 -> P3 -> P4 -> P5)
        self.down_conv1 = GhostConv(channels[0], channels[1], 3, 2)
        self.down_conv2 = GhostConv(channels[1], channels[2], 3, 2)
        self.down_conv3 = GhostConv(channels[2], channels[1], 3, 2)
        
        # Final fusion blocks
        self.final_fusion1 = C3_Res2Block(channels[0] + channels[1], channels[1])  # P3 final
        self.final_fusion2 = C3_Res2Block(channels[1] + channels[2], channels[2])  # P4 final
        self.final_fusion3 = C3_Res2Block(channels[2] + channels[1], channels[2])  # P5 final
        
    def forward(self, features):
        """
        Args:
            features: [P2, P3, P4, P5] from backbone
        Returns:
            [P2_out, P3_out, P4_out, P5_out] for detection heads
        """
        p2, p3, p4, p5 = features
        
        # Apply SPPF to P5
        p5_spp = self.sppf(p5)
        
        # Top-down path
        # P5 -> P4
        p5_up = self.up_conv1(p5_spp)
        p5_up = self.upsample(p5_up)
        p4_fused = torch.cat([p5_up, p4], dim=1)
        p4_fused = self.fusion1(p4_fused)
        
        # P4 -> P3
        p4_up = self.up_conv2(p4_fused)
        p4_up = self.upsample(p4_up)
        p3_fused = torch.cat([p4_up, p3], dim=1)
        p3_fused = self.fusion2(p3_fused)
        
        # P3 -> P2
        p3_up = self.up_conv3(p3_fused)
        p3_up = self.upsample(p3_up)
        p2_fused = torch.cat([p3_up, p2], dim=1)
        p2_out = self.fusion3(p2_fused)
        
        # Bottom-up path
        # P2 -> P3
        p2_down = self.down_conv1(p2_out)
        p3_final = torch.cat([p2_down, p3_fused], dim=1)
        p3_out = self.final_fusion1(p3_final)
        
        # P3 -> P4
        p3_down = self.down_conv2(p3_out)
        p4_final = torch.cat([p3_down, p4_fused], dim=1)
        p4_out = self.final_fusion2(p4_final)
        
        # P4 -> P5
        p4_down = self.down_conv3(p4_out)
        p5_final = torch.cat([p4_down, p5_spp], dim=1)
        p5_out = self.final_fusion3(p5_final)
        
        return [p2_out, p3_out, p4_out, p5_out]

# Test neck
print("Testing LEAF Neck...")
neck = LEAFNeck([64, 128, 256, 256])
neck_outputs = neck(features)

print("Neck output shapes:")
for i, feat in enumerate(neck_outputs):
    print(f"P{i+2}_out: {feat.shape}")
    
print("Neck implemented successfully!")

## 6. Implement Detection Head (IDetect)

Build the improved detection head with implicit layers for multi-scale detection.

In [None]:
class IDetect(nn.Module):
    """Improved Detection Head with implicit layers and anchor-based detection"""
    def __init__(self, nc=10, anchors=(), ch=(), inplace=True):
        super().__init__()
        self.nc = nc  # number of classes
        self.no = nc + 5  # number of outputs per anchor
        self.nl = len(anchors)  # number of detection layers
        self.na = len(anchors[0]) // 2  # number of anchors
        self.grid = [torch.zeros(1)] * self.nl  # init grid
        self.anchor_grid = [torch.zeros(1)] * self.nl  # init anchor grid
        
        # Register anchors
        self.register_buffer('anchors', torch.tensor(anchors).float().view(self.nl, -1, 2))
        
        # Detection convolutions
        self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) for x in ch)
        
        # Implicit layers for knowledge distillation
        self.ia = nn.ModuleList(nn.Parameter(torch.zeros(1, self.na, 1, 1)) for _ in range(self.nl))
        self.im = nn.ModuleList(nn.Parameter(torch.ones(1, self.no * self.na, 1, 1)) for _ in range(self.nl))
        
        self.inplace = inplace
        
    def forward(self, x):
        z = []  # inference output
        
        for i in range(self.nl):
            # Apply implicit layers
            x[i] = x[i] + self.ia[i]
            x[i] = self.m[i](x[i])  # conv
            x[i] = x[i] * self.im[i]  # implicit multiplication
            
            bs, _, ny, nx = x[i].shape
            x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous()
            
            if not self.training:  # inference
                if self.grid[i].shape[2:4] != x[i].shape[2:4]:
                    self.grid[i], self.anchor_grid[i] = self._make_grid(nx, ny, i)
                
                y = x[i].sigmoid()
                if self.inplace:
                    y[..., 0:2] = (y[..., 0:2] * 2 - 0.5 + self.grid[i]) * self.stride[i]  # xy
                    y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i]  # wh
                else:  # for YOLOv5 on AWS Inferentia https://github.com/ultralytics/yolov5/pull/2953
                    xy = (y[..., 0:2] * 2 - 0.5 + self.grid[i]) * self.stride[i]  # xy
                    wh = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i]  # wh
                    y = torch.cat((xy, wh, y[..., 4:]), -1)
                z.append(y.view(bs, -1, self.no))
                
        return x if self.training else (torch.cat(z, 1), x)
    
    def _make_grid(self, nx=20, ny=20, i=0):
        d = self.anchors[i].device
        yv, xv = torch.meshgrid([torch.arange(ny).to(d), torch.arange(nx).to(d)], indexing='ij')
        grid = torch.stack((xv, yv), 2).expand((1, self.na, ny, nx, 2)).float()
        anchor_grid = (self.anchors[i].clone() * self.stride[i]).view((1, self.na, 1, 1, 2)).expand((1, self.na, ny, nx, 2)).float()
        return grid, anchor_grid

class LEAFDetectionHead(nn.Module):
    """Complete LEAF-YOLO Detection Head with PConv preprocessing"""
    def __init__(self, nc=10, anchors=(), ch=(), head_channels=[256, 256, 512, 512]):
        super().__init__()
        
        # Preprocess features with PConv before detection
        self.preprocess = nn.ModuleList([
            PConv(ch[i], head_channels[i]) for i in range(len(ch))
        ])
        
        # Main detection head
        self.detect = IDetect(nc, anchors, head_channels)
        
        # Initialize stride
        self.stride = torch.tensor([4., 8., 16., 32.])  # P2, P3, P4, P5 strides
        self.detect.stride = self.stride
        
    def forward(self, x):
        # Preprocess each feature map
        processed = []
        for i, feat in enumerate(x):
            processed.append(self.preprocess[i](feat))
            
        return self.detect(processed)

# VisDrone anchors (optimized for small objects)
visdrone_anchors = [
    [2.9434, 4.0435, 3.8626, 8.5592, 6.8534, 5.9391],  # P2/4
    [10, 13, 16, 30, 33, 23],                           # P3/8
    [30, 61, 62, 45, 59, 119],                          # P4/16
    [116, 90, 156, 198, 373, 326]                       # P5/32
]

# Test detection head
print("Testing LEAF Detection Head...")
head = LEAFDetectionHead(nc=10, anchors=visdrone_anchors, ch=[64, 128, 256, 256])
head.eval()

with torch.no_grad():
    detection_outputs = head(neck_outputs)
    
if isinstance(detection_outputs, tuple):
    inference_output, training_output = detection_outputs
    print(f"Inference output shape: {inference_output.shape}")
    print(f"Training outputs: {[t.shape for t in training_output]}")
else:
    print(f"Training output shapes: {[t.shape for t in detection_outputs]}")
    
print("Detection head implemented successfully!")

## 7. Assemble Complete LEAF-YOLO Model

Combine all components into the final LEAF-YOLO architecture.

In [None]:
class LEAFYOLO(nn.Module):
    """Complete LEAF-YOLO Model for Aerial Small Object Detection"""
    def __init__(self, nc=10, width_multiple=0.5, depth_multiple=1.0):
        super().__init__()
        self.nc = nc
        self.width_multiple = width_multiple
        self.depth_multiple = depth_multiple
        
        # Calculate channels based on width_multiple
        def make_divisible(x, divisor=8):
            return math.ceil(x / divisor) * divisor
        
        if width_multiple == 0.5:  # Nano version
            channels = [32, 64, 128, 256, 256]
        else:  # Standard version
            channels = [32, 64, 128, 256, 512]
            
        channels = [make_divisible(c * width_multiple) for c in channels]
        
        # Model components
        self.backbone = LEAFBackbone(width_multiple, depth_multiple)
        self.neck = LEAFNeck(channels)
        self.head = LEAFDetectionHead(
            nc=nc, 
            anchors=visdrone_anchors, 
            ch=channels,
            head_channels=[256, 256, 512, 512] if width_multiple == 0.5 else [512, 512, 1024, 1024]
        )
        
        # Initialize weights
        self._initialize_weights()
        
    def forward(self, x):
        # Extract features through backbone
        backbone_features = self.backbone(x)
        
        # Enhance features through neck
        neck_features = self.neck(backbone_features)
        
        # Generate detections
        detections = self.head(neck_features)
        
        return detections
    
    def _initialize_weights(self):
        """Initialize model weights"""
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear):
                nn.init.normal_(m.weight, 0, 0.01)
                nn.init.constant_(m.bias, 0)
    
    def get_model_info(self):
        """Get model information including parameters and FLOPs"""
        def count_parameters(model):
            return sum(p.numel() for p in model.parameters() if p.requires_grad)
        
        def calculate_flops(model, input_size=(1, 3, 640, 640)):
            """Approximate FLOPs calculation"""
            model.eval()
            total_flops = 0
            
            def flop_count_hook(module, input, output):
                nonlocal total_flops
                if isinstance(module, nn.Conv2d):
                    batch_size = input[0].shape[0]
                    output_dims = output.shape[2:]
                    kernel_dims = module.kernel_size
                    in_channels = module.in_channels
                    out_channels = module.out_channels
                    groups = module.groups
                    
                    filters_per_channel = out_channels // groups
                    conv_per_position_flops = int(np.prod(kernel_dims)) * in_channels // groups
                    
                    active_elements_count = batch_size * int(np.prod(output_dims))
                    overall_conv_flops = conv_per_position_flops * active_elements_count * filters_per_channel
                    
                    total_flops += overall_conv_flops
            
            hooks = []
            for module in model.modules():
                if isinstance(module, nn.Conv2d):
                    hooks.append(module.register_forward_hook(flop_count_hook))
            
            with torch.no_grad():
                _ = model(torch.randn(*input_size))
            
            for hook in hooks:
                hook.remove()
                
            return total_flops
        
        params = count_parameters(self)
        flops = calculate_flops(self)
        
        return {
            'parameters': params,
            'parameters_M': params / 1e6,
            'FLOPs': flops,
            'FLOPs_G': flops / 1e9,
            'width_multiple': self.width_multiple,
            'model_type': 'LEAF-YOLO-N' if self.width_multiple == 0.5 else 'LEAF-YOLO'
        }

# Create model variants
print("Creating LEAF-YOLO models...")

# LEAF-YOLO-N (Nano)
model_nano = LEAFYOLO(nc=10, width_multiple=0.5, depth_multiple=1.0)
print("LEAF-YOLO-N created!")

# LEAF-YOLO (Standard)  
model_standard = LEAFYOLO(nc=10, width_multiple=1.0, depth_multiple=1.0)
print("LEAF-YOLO Standard created!")

# Test forward pass
print("\\nTesting forward pass...")
test_input = torch.randn(2, 3, 640, 640)  # Batch of 2 images

# Test nano model
model_nano.eval()
with torch.no_grad():
    nano_output = model_nano(test_input)
    
print(f"Nano model output type: {type(nano_output)}")
if isinstance(nano_output, tuple):
    print(f"Inference output shape: {nano_output[0].shape}")
    print(f"Training outputs: {len(nano_output[1])} layers")
else:
    print(f"Training output: {len(nano_output)} layers")

# Get model information
nano_info = model_nano.get_model_info()
standard_info = model_standard.get_model_info()

print("\\nModel Information:")
print(f"LEAF-YOLO-N: {nano_info['parameters_M']:.2f}M params, {nano_info['FLOPs_G']:.1f}G FLOPs")
print(f"LEAF-YOLO: {standard_info['parameters_M']:.2f}M params, {standard_info['FLOPs_G']:.1f}G FLOPs")

print("\\nLEAF-YOLO models assembled successfully!")

## 8. Model Configuration and Training Setup

Configure training parameters and loss functions for VisDrone dataset training.

In [None]:
# Training Configuration
class TrainingConfig:
    """Training configuration for LEAF-YOLO on VisDrone dataset"""
    def __init__(self):
        # Dataset configuration
        self.dataset = 'VisDrone2019-DET'
        self.nc = 10  # Number of classes
        self.class_names = [
            'pedestrian', 'people', 'bicycle', 'car', 'van', 
            'truck', 'tricycle', 'awning-tricycle', 'bus', 'motor'
        ]
        
        # Training parameters
        self.epochs = 1000
        self.batch_size = 16
        self.img_size = 640
        self.lr0 = 0.01  # Initial learning rate
        self.lrf = 0.01  # Final learning rate (lr0 * lrf)
        self.momentum = 0.937
        self.weight_decay = 0.0005
        self.warmup_epochs = 3
        
        # Augmentation parameters
        self.hsv_h = 0.015  # Hue augmentation
        self.hsv_s = 0.7    # Saturation augmentation  
        self.hsv_v = 0.4    # Value augmentation
        self.degrees = 0.0  # Rotation degrees
        self.translate = 0.1 # Translation
        self.scale = 0.5    # Scale
        self.shear = 0.0    # Shear
        self.perspective = 0.0 # Perspective
        self.flipud = 0.0   # Vertical flip probability
        self.fliplr = 0.5   # Horizontal flip probability
        self.mosaic = 1.0   # Mosaic probability
        self.mixup = 0.0    # MixUp probability
        
        # Loss weights
        self.box_loss_gain = 0.05
        self.cls_loss_gain = 0.5
        self.obj_loss_gain = 1.0
        self.label_smoothing = 0.0
        
        # Anchor matching
        self.anchor_t = 4.0  # Anchor-multiple threshold

config = TrainingConfig()

# Loss Functions
class ComputeLoss:
    """Compute loss for LEAF-YOLO training"""
    def __init__(self, model, autobalance=False):
        self.sort_obj_iou = False
        device = next(model.parameters()).device
        h = model.head.detect  # Detect() module
        
        # Define criteria
        BCEcls = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([1.0], device=device))
        BCEobj = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([1.0], device=device))
        
        # Class label smoothing
        self.cp, self.cn = smooth_BCE(eps=config.label_smoothing)
        
        # Focal loss
        g = 1.5  # focal loss gamma
        if config.cls_loss_gain > 0:
            BCEcls, BCEobj = FocalLoss(BCEcls, g), FocalLoss(BCEobj, g)
            
        det = h  # Detect() module
        self.balance = [4.0, 1.0, 0.4]  # P3-P5
        self.ssi = list(det.stride).index(16) if autobalance else 0  # stride 16 index
        self.BCEcls, self.BCEobj, self.gr, self.hyp, self.autobalance = BCEcls, BCEobj, 1.0, config, autobalance
        
        for k in 'na', 'nc', 'nl', 'anchors':
            setattr(self, k, getattr(det, k))
            
    def __call__(self, p, targets):
        device = targets.device
        lcls, lbox, lobj = torch.zeros(1, device=device), torch.zeros(1, device=device), torch.zeros(1, device=device)
        tcls, tbox, indices, anchors = self.build_targets(p, targets)
        
        # Losses
        for i, pi in enumerate(p):  # layer index, layer predictions
            b, a, gj, gi = indices[i]  # image, anchor, gridy, gridx
            tobj = torch.zeros_like(pi[..., 0], device=device)  # target obj
            
            n = b.shape[0]  # number of targets
            if n:
                ps = pi[b, a, gj, gi]  # prediction subset corresponding to targets
                
                # Regression
                pxy = ps[:, :2].sigmoid() * 2 - 0.5
                pwh = (ps[:, 2:4].sigmoid() * 2) ** 2 * anchors[i]
                pbox = torch.cat((pxy, pwh), 1)  # predicted box
                iou = bbox_iou(pbox.T, tbox[i], x1y1x2y2=False, CIoU=True)  # iou(prediction, target)
                lbox += (1.0 - iou).mean()  # iou loss
                
                # Objectness
                iou = iou.detach().clamp(0).type(tobj.dtype)
                if self.sort_obj_iou:
                    sort_id = torch.argsort(iou)
                    b, a, gj, gi, iou = b[sort_id], a[sort_id], gj[sort_id], gi[sort_id], iou[sort_id]
                tobj[b, a, gj, gi] = (1.0 - self.gr) + self.gr * iou  # iou ratio
                
                # Classification
                if self.nc > 1:  # cls loss (only if multiple classes)
                    t = torch.full_like(ps[:, 5:], self.cn, device=device)  # targets
                    t[range(n), tcls[i]] = self.cp
                    lcls += self.BCEcls(ps[:, 5:], t)  # BCE
                    
            obji = self.BCEobj(pi[..., 4], tobj)
            lobj += obji * self.balance[i]  # obj loss
            if self.autobalance:
                self.balance[i] = self.balance[i] * 0.9999 + 0.0001 / obji.detach().item()
                
        if self.autobalance:
            self.balance = [x / self.balance[self.ssi] for x in self.balance]
        lbox *= config.box_loss_gain
        lobj *= config.obj_loss_gain
        lcls *= config.cls_loss_gain
        bs = tobj.shape[0]  # batch size
        
        return (lbox + lobj + lcls) * bs, torch.cat((lbox, lobj, lcls)).detach()
    
    def build_targets(self, p, targets):
        # Build targets for compute_loss()
        na, nt = self.na, targets.shape[0]  # number of anchors, targets
        tcls, tbox, indices, anch = [], [], [], []
        gain = torch.ones(7, device=targets.device)  # normalized to gridspace gain
        ai = torch.arange(na, device=targets.device).float().view(na, 1).repeat(1, nt)  # same as .repeat_interleave(nt)
        targets = torch.cat((targets.repeat(na, 1, 1), ai[:, :, None]), 2)  # append anchor indices
        
        g = 0.5  # bias
        off = torch.tensor([[0, 0],
                           [1, 0], [0, 1], [-1, 0], [0, -1],  # j,k,l,m
                           ], device=targets.device).float() * g  # offsets
        
        for i in range(self.nl):
            anchors = self.anchors[i]
            gain[2:6] = torch.tensor(p[i].shape)[[3, 2, 3, 2]]  # xyxy gain
            
            # Match targets to anchors
            t = targets * gain
            if nt:
                # Matches
                r = t[:, :, 4:6] / anchors[:, None]  # wh ratio
                j = torch.max(r, 1 / r).max(2)[0] < config.anchor_t  # compare
                t = t[j]  # filter
                
                # Offsets
                gxy = t[:, 2:4]  # grid xy
                gxi = gain[[2, 3]] - gxy  # inverse
                j, k = ((gxy % 1 < g) & (gxy > 1)).T
                l, m = ((gxi % 1 < g) & (gxi > 1)).T
                j = torch.stack((torch.ones_like(j), j, k, l, m))
                t = t.repeat((5, 1, 1))[j]
                offsets = (torch.zeros_like(gxy)[None] + off[:, None])[j]
            else:
                t = targets[0]
                offsets = 0
                
            # Define
            b, c = t[:, :2].long().T  # image, class
            gxy = t[:, 2:4]  # grid xy
            gwh = t[:, 4:6]  # grid wh
            gij = (gxy - offsets).long()
            gi, gj = gij.T  # grid xy indices
            
            # Append
            a = t[:, 6].long()  # anchor indices
            indices.append((b, a, gj.clamp_(0, gain[3] - 1), gi.clamp_(0, gain[2] - 1)))  # image, anchor, grid indices
            tbox.append(torch.cat((gxy - gij, gwh), 1))  # box
            anch.append(anchors[a])  # anchors
            tcls.append(c)  # class
            
        return tcls, tbox, indices, anch

def smooth_BCE(eps=0.1):
    """Label smoothing BCE targets"""
    return 1.0 - 0.5 * eps, 0.5 * eps

class FocalLoss(nn.Module):
    """Focal Loss implementation"""
    def __init__(self, loss_fcn, gamma=1.5, alpha=0.25):
        super().__init__()
        self.loss_fcn = loss_fcn  # must be nn.BCEWithLogitsLoss()
        self.gamma = gamma
        self.alpha = alpha
        self.reduction = loss_fcn.reduction
        self.loss_fcn.reduction = 'none'  # required to apply FL to each element
        
    def forward(self, pred, true):
        loss = self.loss_fcn(pred, true)
        pred_prob = torch.sigmoid(pred)  # prob from logits
        p_t = true * pred_prob + (1 - true) * (1 - pred_prob)
        alpha_factor = true * self.alpha + (1 - true) * (1 - self.alpha)
        modulating_factor = (1.0 - p_t) ** self.gamma
        loss *= alpha_factor * modulating_factor
        
        if self.reduction == 'mean':
            return loss.mean()
        elif self.reduction == 'sum':
            return loss.sum()
        else:  # 'none'
            return loss

def bbox_iou(box1, box2, x1y1x2y2=True, GIoU=False, DIoU=False, CIoU=False, eps=1e-7):
    """Calculate IoU between boxes"""
    # Get the coordinates of bounding boxes
    if x1y1x2y2:  # x1, y1, x2, y2 = box1
        b1_x1, b1_y1, b1_x2, b1_y2 = box1[0], box1[1], box1[2], box1[3]
        b2_x1, b2_y1, b2_x2, b2_y2 = box2[0], box2[1], box2[2], box2[3]
    else:  # transform from xywh to xyxy
        b1_x1, b1_x2 = box1[0] - box1[2] / 2, box1[0] + box1[2] / 2
        b1_y1, b1_y2 = box1[1] - box1[3] / 2, box1[1] + box1[3] / 2
        b2_x1, b2_x2 = box2[0] - box2[2] / 2, box2[0] + box2[2] / 2
        b2_y1, b2_y2 = box2[1] - box2[3] / 2, box2[1] + box2[3] / 2
        
    # Intersection area
    inter = (torch.min(b1_x2, b2_x2) - torch.max(b1_x1, b2_x1)).clamp(0) * \\
            (torch.min(b1_y2, b2_y2) - torch.max(b1_y1, b2_y1)).clamp(0)
            
    # Union Area
    w1, h1 = b1_x2 - b1_x1, b1_y2 - b1_y1 + eps
    w2, h2 = b2_x2 - b2_x1, b2_y2 - b2_y1 + eps
    union = w1 * h1 + w2 * h2 - inter + eps
    
    iou = inter / union
    if CIoU or DIoU or GIoU:
        cw = torch.max(b1_x2, b2_x2) - torch.min(b1_x1, b2_x1)  # convex (smallest enclosing box) width
        ch = torch.max(b1_y2, b2_y2) - torch.min(b1_y1, b2_y1)  # convex height
        if CIoU or DIoU:  # Distance or Complete IoU https://arxiv.org/abs/1911.08287v1
            c2 = cw ** 2 + ch ** 2 + eps  # convex diagonal squared
            rho2 = ((b2_x1 + b2_x2 - b1_x1 - b1_x2) ** 2 +
                    (b2_y1 + b2_y2 - b1_y1 - b1_y2) ** 2) / 4  # center distance squared
            if CIoU:  # https://github.com/Zzh-tju/DIoU-SSD-pytorch/blob/master/utils/box/box_utils.py#L47
                v = (4 / math.pi ** 2) * torch.pow(torch.atan(w2 / h2) - torch.atan(w1 / h1), 2)
                with torch.no_grad():
                    alpha = v / (v - iou + (1 + eps))
                return iou - (rho2 / c2 + v * alpha)  # CIoU
            return iou - rho2 / c2  # DIoU
        c_area = cw * ch + eps  # convex area
        return iou - (c_area - union) / c_area  # GIoU https://arxiv.org/pdf/1902.09630.pdf
    return iou  # IoU

print("Training configuration and loss functions defined!")
print(f"Dataset: {config.dataset}")
print(f"Classes: {config.nc}")  
print(f"Epochs: {config.epochs}")
print(f"Batch size: {config.batch_size}")
print(f"Image size: {config.img_size}")

## 9. Performance Evaluation and Visualization

Implement evaluation metrics and model analysis tools for comprehensive performance assessment.

In [None]:
class ModelAnalyzer:
    """Comprehensive model analysis and evaluation tools"""
    
    def __init__(self, model):
        self.model = model
        
    def count_parameters(self):
        """Count total and trainable parameters"""
        total_params = sum(p.numel() for p in self.model.parameters())
        trainable_params = sum(p.numel() for p in self.model.parameters() if p.requires_grad)
        
        return {
            'total_parameters': total_params,
            'trainable_parameters': trainable_params,
            'total_parameters_M': total_params / 1e6,
            'trainable_parameters_M': trainable_params / 1e6
        }
    
    def analyze_model_size(self):
        """Analyze model memory usage"""
        param_size = 0
        for param in self.model.parameters():
            param_size += param.nelement() * param.element_size()
        
        buffer_size = 0
        for buffer in self.model.buffers():
            buffer_size += buffer.nelement() * buffer.element_size()
            
        size_all_mb = (param_size + buffer_size) / 1024**2
        
        return {
            'parameter_size_MB': param_size / 1024**2,
            'buffer_size_MB': buffer_size / 1024**2,
            'total_size_MB': size_all_mb
        }
    
    def profile_inference_speed(self, input_size=(1, 3, 640, 640), num_runs=100, warmup_runs=10):
        """Profile inference speed"""
        device = next(self.model.parameters()).device
        dummy_input = torch.randn(*input_size, device=device)
        
        # Warmup
        self.model.eval()
        with torch.no_grad():
            for _ in range(warmup_runs):
                _ = self.model(dummy_input)
        
        # Timing
        torch.cuda.synchronize() if device.type == 'cuda' else None
        start_time = time.time()
        
        with torch.no_grad():
            for _ in range(num_runs):
                _ = self.model(dummy_input)
                
        torch.cuda.synchronize() if device.type == 'cuda' else None
        end_time = time.time()
        
        avg_time = (end_time - start_time) / num_runs
        fps = 1 / avg_time
        
        return {
            'average_inference_time_ms': avg_time * 1000,
            'fps': fps,
            'device': str(device)
        }
    
    def visualize_architecture(self):
        """Visualize model architecture"""
        def count_layers_by_type(model):
            layer_counts = {}
            for name, module in model.named_modules():
                module_type = type(module).__name__
                if module_type != 'LEAFYOLO' and not name == '':  # Skip root module
                    layer_counts[module_type] = layer_counts.get(module_type, 0) + 1
            return layer_counts
        
        layer_counts = count_layers_by_type(self.model)
        
        # Create visualization
        plt.figure(figsize=(12, 8))
        
        # Layer distribution pie chart
        plt.subplot(2, 2, 1)
        labels = list(layer_counts.keys())
        sizes = list(layer_counts.values())
        plt.pie(sizes, labels=labels, autopct='%1.1f%%', startangle=90)
        plt.title('Layer Distribution')
        
        # Parameter distribution bar chart
        plt.subplot(2, 2, 2)
        param_counts = {}
        for name, module in self.model.named_modules():
            module_type = type(module).__name__
            if hasattr(module, 'parameters') and module_type != 'LEAFYOLO':
                param_count = sum(p.numel() for p in module.parameters() if p.requires_grad)
                if param_count > 0:
                    param_counts[module_type] = param_counts.get(module_type, 0) + param_count
        
        if param_counts:
            modules = list(param_counts.keys())
            params = [param_counts[m] / 1000 for m in modules]  # Convert to thousands
            plt.bar(modules, params)
            plt.title('Parameters by Module Type (K)')
            plt.xticks(rotation=45)
        
        # Model summary text
        plt.subplot(2, 1, 2)
        plt.axis('off')
        
        # Get model info
        param_info = self.count_parameters()
        size_info = self.analyze_model_size()
        
        summary_text = f"""
        LEAF-YOLO Model Summary
        ========================
        
        Architecture: {self.model.get_model_info()['model_type']}
        Width Multiple: {self.model.width_multiple}
        
        Parameters:
        - Total: {param_info['total_parameters_M']:.2f}M
        - Trainable: {param_info['trainable_parameters_M']:.2f}M
        
        Memory Usage:
        - Parameters: {size_info['parameter_size_MB']:.2f} MB
        - Buffers: {size_info['buffer_size_MB']:.2f} MB
        - Total: {size_info['total_size_MB']:.2f} MB
        
        Target Dataset: VisDrone2019-DET (10 classes)
        Input Size: 640x640x3
        Output Scales: P2, P3, P4, P5 (4 scales)
        """
        
        plt.text(0.1, 0.5, summary_text, fontsize=12, verticalalignment='center', 
                fontfamily='monospace')
        
        plt.tight_layout()
        plt.show()

class VisDroneMetrics:
    """Evaluation metrics specifically for VisDrone dataset"""
    
    def __init__(self, class_names):
        self.class_names = class_names
        self.nc = len(class_names)
        
    def compute_ap(self, tp, conf, pred_cls, target_cls):
        """Compute Average Precision"""
        # Sort by objectness
        i = np.argsort(-conf)
        tp, conf, pred_cls = tp[i], conf[i], pred_cls[i]
        
        # Find unique classes
        unique_classes = np.unique(target_cls)
        
        # Create Precision-Recall curve and compute AP for each class
        pr_score = 0.1  # score to evaluate P and R https://github.com/ultralytics/yolov3/issues/898
        
        # number of classes and number of detections
        nc = len(unique_classes)
        ap, p, r = np.zeros((nc, tp.shape[1])), np.zeros((nc, 1000)), np.zeros((nc, 1000))
        
        for ci, c in enumerate(unique_classes):
            i = pred_cls == c
            n_l = (target_cls == c).sum()  # number of labels
            n_p = i.sum()  # number of predictions
            
            if n_p == 0 or n_l == 0:
                continue
            else:
                # Accumulate FPs and TPs
                fpc = (1 - tp[i]).cumsum(0)
                tpc = tp[i].cumsum(0)
                
                # Recall
                recall = tpc / (n_l + 1e-16)  # recall curve
                r[ci] = np.interp(-pr_score, -conf[i], recall[:, 0], left=0)  # negative x, xp because xp decreases
                
                # Precision
                precision = tpc / (tpc + fpc)  # precision curve
                p[ci] = np.interp(-pr_score, -conf[i], precision[:, 0], left=1)  # p at pr_score
                
                # AP from recall-precision curve
                for j in range(tp.shape[1]):
                    ap[ci, j], mpre, mrec = self.compute_ap_per_class(recall[:, j], precision[:, j])
                    
        return ap, p, r
    
    def compute_ap_per_class(self, recall, precision):
        """Compute AP for a single class"""
        # Append sentinel values at beginning and end
        mrec = np.concatenate(([0.0], recall, [1.0]))
        mpre = np.concatenate(([1.0], precision, [0.0]))
        
        # Compute precision envelope
        for i in range(mpre.size - 1, 0, -1):
            mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
            
        # Look for recall value changes
        i = np.where(mrec[1:] != mrec[:-1])[0]
        
        # Sum ΔRecall * Precision
        ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
        
        return ap, mpre, mrec
    
    def print_results(self, ap, p, r, class_names=None):
        """Print evaluation results"""
        if class_names is None:
            class_names = self.class_names
            
        # Print results
        pf = '%20s' + '%12s' * 6  # print format
        print(pf % ('Class', 'Images', 'Labels', 'P', 'R', 'mAP@.5', 'mAP@.5:.95'))
        print('-' * 86)
        
        # Per-class results
        for i, c in enumerate(class_names):
            print(pf % (c, 0, 0, p[i, 0], r[i, 0], ap[i, 0], ap[i].mean()))
            
        # Overall results
        print(pf % ('all', 0, 0, p.mean(), r.mean(), ap[:, 0].mean(), ap.mean()))

# Performance Analysis
print("Performing comprehensive model analysis...")

# Analyze nano model
analyzer_nano = ModelAnalyzer(model_nano)
analyzer_standard = ModelAnalyzer(model_standard)

# Parameter analysis
nano_params = analyzer_nano.count_parameters()
standard_params = analyzer_standard.count_parameters()

print("\\nParameter Analysis:")
print(f"LEAF-YOLO-N: {nano_params['total_parameters_M']:.2f}M parameters")
print(f"LEAF-YOLO Standard: {standard_params['total_parameters_M']:.2f}M parameters")

# Model size analysis
nano_size = analyzer_nano.analyze_model_size()
standard_size = analyzer_standard.analyze_model_size()

print("\\nModel Size Analysis:")
print(f"LEAF-YOLO-N: {nano_size['total_size_MB']:.2f} MB")
print(f"LEAF-YOLO Standard: {standard_size['total_size_MB']:.2f} MB")

# Inference speed (if CUDA available)
if torch.cuda.is_available():
    model_nano = model_nano.cuda()
    nano_speed = analyzer_nano.profile_inference_speed(num_runs=50)
    print(f"\\nInference Speed (LEAF-YOLO-N on GPU):")
    print(f"Average time: {nano_speed['average_inference_time_ms']:.2f} ms")
    print(f"FPS: {nano_speed['fps']:.1f}")
else:
    print("\\nCUDA not available - skipping GPU inference speed test")

# Visualize architecture
print("\\nGenerating model architecture visualization...")
analyzer_nano.visualize_architecture()

# Expected VisDrone performance (based on paper)
print("\\nExpected VisDrone Performance:")
print("LEAF-YOLO-N:")
print("  - mAP@50:95: 21.9%")
print("  - mAP@50: 39.7%") 
print("  - AP_S (small): 14.0%")
print("  - Jetson AGX Xavier: 56 FPS")
print("\\nLEAF-YOLO Standard:")
print("  - mAP@50:95: 28.2%")
print("  - mAP@50: 48.3%")
print("  - AP_S (small): 20.0%") 
print("  - Jetson AGX Xavier: 32 FPS")

print("\\nModel analysis completed successfully!")