In [4]:
import time
import torch
frame_num = 200

In [4]:

from HarDMSEG import HarDMSEG

model = HarDMSEG(in_channels = 3, out_channels = 1)
total_params = sum(p.numel() for p in model.parameters())
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)

print(f"Total parameters: {total_params:,}")
print(f"Trainable parameters: {trainable_params:,}")

68 LOADED
Total parameters: 33,343,244
Trainable parameters: 33,343,244


In [5]:
total_time = 0
for i in range(frame_num):
    input_image = torch.zeros((1, 3, 256, 256))

    start_time = time.time()

    output = model(input_image)

    end_time = time.time()

    total_time += (end_time - start_time)
avg_time = total_time / frame_num
fps = 1 / avg_time
print(f"Average FPS over {frame_num} images: {fps}")

Average FPS over 200 images: 1.3432959059044134


In [53]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class DoubleConv(nn.Module):
    """[Conv2d => BatchNorm => ReLU] × 2"""
    def __init__(self, in_channels, out_channels):
        super(DoubleConv, self).__init__()
        self.double_conv = nn.Sequential(
            nn.Conv2d(in_channels, out_channels, 3, padding=1),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True),

            nn.Conv2d(out_channels, out_channels, 3, padding=1),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True)
        )

    def forward(self, x):
        return self.double_conv(x)

class UNet(nn.Module):
    def __init__(self, in_channels=3, out_channels=1):
        super(UNet, self).__init__()
        self.enc1 = DoubleConv(in_channels, 64)
        self.enc2 = DoubleConv(64, 128)
        self.enc3 = DoubleConv(128, 256)
        self.enc4 = DoubleConv(256, 512)

        self.bottleneck = DoubleConv(512, 1024)

        self.upconv4 = nn.ConvTranspose2d(1024, 512, kernel_size=2, stride=2)
        self.dec4 = DoubleConv(1024, 512)

        self.upconv3 = nn.ConvTranspose2d(512, 256, kernel_size=2, stride=2)
        self.dec3 = DoubleConv(512, 256)

        self.upconv2 = nn.ConvTranspose2d(256, 128, kernel_size=2, stride=2)
        self.dec2 = DoubleConv(256, 128)

        self.upconv1 = nn.ConvTranspose2d(128, 64, kernel_size=2, stride=2)
        self.dec1 = DoubleConv(128, 64)

        self.final_conv = nn.Conv2d(64, out_channels, kernel_size=1)

    def forward(self, x):
        # Encoder
        e1 = self.enc1(x)
        e2 = self.enc2(F.max_pool2d(e1, 2))
        e3 = self.enc3(F.max_pool2d(e2, 2))
        e4 = self.enc4(F.max_pool2d(e3, 2))

        # Bottleneck
        b = self.bottleneck(F.max_pool2d(e4, 2))

        # Decoder
        d4 = self.upconv4(b)
        d4 = self.dec4(torch.cat((e4, d4), dim=1))

        d3 = self.upconv3(d4)
        d3 = self.dec3(torch.cat((e3, d3), dim=1))

        d2 = self.upconv2(d3)
        d2 = self.dec2(torch.cat((e2, d2), dim=1))

        d1 = self.upconv1(d2)
        d1 = self.dec1(torch.cat((e1, d1), dim=1))

        return torch.sigmoid(self.final_conv(d1))  # for binary segmentation

In [56]:

model = UNet(in_channels=3, out_channels=1)
total_params = sum(p.numel() for p in model.parameters())
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)

print(f"Total parameters: {total_params:,}")
print(f"Trainable parameters: {trainable_params:,}")

Total parameters: 31,043,521
Trainable parameters: 31,043,521


In [57]:
total_time = 0
for i in range(frame_num):
    input_image = torch.zeros((1, 3, 256, 256))

    start_time = time.time()

    output = model(input_image)

    end_time = time.time()

    total_time += (end_time - start_time)
avg_time = total_time / frame_num
fps = 1 / avg_time
print(f"Unet Average FPS over {frame_num} images: {fps}")

Unet Average FPS over 200 images: 5.106356205180485


In [17]:
from ultralytics import YOLO

model = YOLO("yolo11x-seg.pt")
total_params = sum(p.numel() for p in model.parameters())
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)

print(f"Total parameters: {total_params:,}")
print(f"Trainable parameters: {trainable_params:,}")

Downloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11x-seg.pt to 'yolo11x-seg.pt'...


100%|███████████████████████████████████████████████████████████████████████████████| 119M/119M [00:49<00:00, 2.52MB/s]


Total parameters: 62,142,656
Trainable parameters: 0


In [18]:
total_time = 0
for i in range(frame_num):
    input_image = torch.zeros((1, 3, 256, 256))

    start_time = time.time()

    output = model(input_image)

    end_time = time.time()

    total_time += (end_time - start_time)
avg_time = total_time / frame_num
fps = 1 / avg_time
print(f"Yolo11s Average FPS over {frame_num} images: {fps}")


0: 256x256 (no detections), 44.5ms
Speed: 0.6ms preprocess, 44.5ms inference, 0.8ms postprocess per image at shape (1, 3, 256, 256)

0: 256x256 (no detections), 38.8ms
Speed: 0.2ms preprocess, 38.8ms inference, 0.9ms postprocess per image at shape (1, 3, 256, 256)

0: 256x256 (no detections), 19.7ms
Speed: 0.2ms preprocess, 19.7ms inference, 0.8ms postprocess per image at shape (1, 3, 256, 256)

0: 256x256 (no detections), 20.4ms
Speed: 0.2ms preprocess, 20.4ms inference, 0.8ms postprocess per image at shape (1, 3, 256, 256)

0: 256x256 (no detections), 20.7ms
Speed: 0.2ms preprocess, 20.7ms inference, 0.8ms postprocess per image at shape (1, 3, 256, 256)

0: 256x256 (no detections), 19.9ms
Speed: 0.2ms preprocess, 19.9ms inference, 0.8ms postprocess per image at shape (1, 3, 256, 256)

0: 256x256 (no detections), 19.9ms
Speed: 0.2ms preprocess, 19.9ms inference, 0.8ms postprocess per image at shape (1, 3, 256, 256)

0: 256x256 (no detections), 19.9ms
Speed: 0.2ms preprocess, 19.9ms i

In [6]:
from trfe import TRFENet

In [58]:
model = TRFENet(3, 1)
total_params = sum(p.numel() for p in model.parameters())
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)

print(f"Total parameters: {total_params:,}")
print(f"Trainable parameters: {trainable_params:,}")

Total parameters: 46,546,946
Trainable parameters: 46,546,946


In [59]:
total_time = 0
for i in range(frame_num):
    input_image = torch.zeros((1, 3, 256, 256))

    start_time = time.time()

    output = model(input_image)

    end_time = time.time()

    total_time += (end_time - start_time)
avg_time = total_time / frame_num
fps = 1 / avg_time
print(f"TRFE Average FPS over {frame_num} images: {fps}")

TRFE Average FPS over 200 images: 3.120836442596583


In [6]:
from model import Eff_Unet
model = Eff_Unet(
        layers=[5, 5, 15, 10],
        embed_dims=[10, 20, 48, 96],
        # embed_dims=[40, 80, 192, 384],
        downsamples=[True, True, True, True],
        vit_num=6,
        drop_path_rate=0.1,
        num_classes=1,
        resolution = 256,
        input_channels = 3)

total_params = sum(p.numel() for p in model.parameters())
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)

print(f"Total parameters: {total_params:,}")
print(f"Trainable parameters: {trainable_params:,}")

Total parameters: 7,535,487
Trainable parameters: 7,535,487


In [7]:
total_time = 0
for i in range(frame_num):
    input_image = torch.zeros((1, 3, 256, 256))

    start_time = time.time()

    output = model(input_image)

    end_time = time.time()

    total_time += (end_time - start_time)
avg_time = total_time / frame_num
fps = 1 / avg_time
print(f"Eff_Unet Average FPS over {frame_num} images: {fps}")

Eff_Unet Average FPS over 200 images: 9.202961699518573


In [11]:
from hybrid_model_v3_upsample import HybridSegModel
image_size = 256
model = HybridSegModel(in_channels = 3, out_channels = 2, output_size = image_size, layers_num = 3)
total_params = sum(p.numel() for p in model.parameters())
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)

print(f"Total parameters: {total_params:,}")
print(f"Trainable parameters: {trainable_params:,}")

Total parameters: 3,535,663
Trainable parameters: 3,535,663


In [12]:
from tqdm import tqdm
total_time = 0
frame_num = 200
for i in tqdm(range(frame_num)):
    input_image = torch.zeros((1, 3, 256, 256))

    start_time = time.time()

    output = model(input_image)

    end_time = time.time()

    total_time += (end_time - start_time)
avg_time = total_time / frame_num
fps = 1 / avg_time
print(f"Average FPS over {frame_num} images: {fps}")

100%|██████████| 200/200 [00:14<00:00, 13.45it/s]

Average FPS over 200 images: 13.605311499721013



