In [None]:
import torch
import torchvision.transforms as T
from PIL import Image
import requests
import time
from collections import defaultdict

In [None]:
model = torch.hub.load('facebookresearch/detr', 'detr_resnet50', pretrained=True)
model.eval()

# Print all named modules
for name, module in model.named_modules():
    print(name, '→', type(module))


Downloading: "https://github.com/facebookresearch/detr/zipball/main" to /root/.cache/torch/hub/main.zip
Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth
100%|██████████| 97.8M/97.8M [00:00<00:00, 108MB/s]
Downloading: "https://dl.fbaipublicfiles.com/detr/detr-r50-e632da11.pth" to /root/.cache/torch/hub/checkpoints/detr-r50-e632da11.pth
100%|██████████| 159M/159M [00:01<00:00, 132MB/s]


 → <class 'models.detr.DETR'>
transformer → <class 'models.transformer.Transformer'>
transformer.encoder → <class 'models.transformer.TransformerEncoder'>
transformer.encoder.layers → <class 'torch.nn.modules.container.ModuleList'>
transformer.encoder.layers.0 → <class 'models.transformer.TransformerEncoderLayer'>
transformer.encoder.layers.0.self_attn → <class 'torch.nn.modules.activation.MultiheadAttention'>
transformer.encoder.layers.0.self_attn.out_proj → <class 'torch.nn.modules.linear.NonDynamicallyQuantizableLinear'>
transformer.encoder.layers.0.linear1 → <class 'torch.nn.modules.linear.Linear'>
transformer.encoder.layers.0.dropout → <class 'torch.nn.modules.dropout.Dropout'>
transformer.encoder.layers.0.linear2 → <class 'torch.nn.modules.linear.Linear'>
transformer.encoder.layers.0.norm1 → <class 'torch.nn.modules.normalization.LayerNorm'>
transformer.encoder.layers.0.norm2 → <class 'torch.nn.modules.normalization.LayerNorm'>
transformer.encoder.layers.0.dropout1 → <class 'torc

In [None]:
from google.colab import drive
drive.mount('/content/drive')

# Load image from Google Drive
image_path = '/content/drive/MyDrive/peddet_2_1.jpg'
image = Image.open(image_path).convert('RGB')

Mounted at /content/drive


In [None]:
# Measure total model inference time (GPU)
start_event = torch.cuda.Event(enable_timing=True)
end_event = torch.cuda.Event(enable_timing=True)

with torch.no_grad():
    torch.cuda.synchronize()  # Make sure GPU is ready
    start_event.record()

    outputs = model(img_tensor)  # Full forward pass

    end_event.record()
    torch.cuda.synchronize()  # Wait for all ops to finish

total_model_time = start_event.elapsed_time(end_event) / 1000.0  # in seconds
print(f"\n✅ Total model inference time (end-to-end): {total_model_time:.6f} sec")



✅ Total model inference time (end-to-end): 0.158241 sec


In [None]:
import torch
import torchvision.transforms as T
from PIL import Image
from torch.profiler import profile, record_function, ProfilerActivity

# Image + model loading (as before)
image = Image.open("/content/drive/MyDrive/peddet_2_1.jpg").convert("RGB")
model = torch.hub.load('facebookresearch/detr', 'detr_resnet50', pretrained=True)
model.eval().cuda()

transform = T.Compose([
    T.Resize(800),
    T.ToTensor(),
    T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])
img_tensor = transform(image).unsqueeze(0).cuda()

# Warm-up
with torch.no_grad():
    for _ in range(5):
        _ = model(img_tensor)

# Accurate profiling with PyTorch Profiler
with profile(
    activities=[ProfilerActivity.CPU, ProfilerActivity.CUDA],
    record_shapes=True,
    with_stack=True,
    profile_memory=True
) as prof:
    with torch.no_grad():
        with record_function("model_inference"):
            _ = model(img_tensor)

# Print high-level summary
print(prof.key_averages().table(sort_by="cuda_time_total", row_limit=15))


Using cache found in /root/.cache/torch/hub/facebookresearch_detr_main


-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                   Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg       CPU Mem  Self CPU Mem      CUDA Mem  Self CUDA Mem    # of Calls  
-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                        model_inference         0.00%       0.000us         0.00%       0.000us       0.000us      96.199ms       103.60%      96.199ms      96.199ms           0 b           0 b           0 b           0 

In [None]:
import torch
import torchvision.transforms as T
from PIL import Image
import time

# ✅ Load image
image = Image.open("/content/drive/MyDrive/peddet_2_1.jpg").convert("RGB")

# ✅ Load model and move to GPU
model = torch.hub.load('facebookresearch/detr', 'detr_resnet50', pretrained=True)
model.eval().cuda()

# ✅ Preprocess image
transform = T.Compose([
    T.Resize(800),
    T.ToTensor(),
    T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])
img_tensor = transform(image).unsqueeze(0).cuda()

# ✅ Warm-up pass (important for GPU)
with torch.no_grad():
    for _ in range(5):
        _ = model(img_tensor)

# ✅ Measure actual inference time
torch.cuda.synchronize()
start = time.time()

with torch.no_grad():
    _ = model(img_tensor)

torch.cuda.synchronize()
end = time.time()

# ✅ Print result
print(f"\n⏱️ Actual GPU inference time (wall clock): {end - start:.6f} sec")


Using cache found in /root/.cache/torch/hub/facebookresearch_detr_main



⏱️ Actual GPU inference time (wall clock): 0.096108 sec
