<a href="https://colab.research.google.com/github/elangbijak4/Image-Video-AI/blob/main/GPU_DEMONSTRASI_RE_DETR.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Langkah 1: Instal library yang diperlukan
!pip install torch torchvision transformers opencv-python

In [None]:
!pip install timm

In [10]:
import timm
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision

In [11]:
# Langkah 2: Import library yang diperlukan
import torchvision.transforms as T
from transformers import AutoImageProcessor, DetrForObjectDetection
import cv2
import numpy as np
from matplotlib import pyplot as plt

In [None]:
# Langkah 3: Memuat model RE-DETR dan image processor
processor = AutoImageProcessor.from_pretrained("SenseTime/re_detr")
model = DetrForObjectDetection.from_pretrained("SenseTime/re_detr").to("cuda")

In [None]:
# Langkah 4: Mendefinisikan fungsi untuk menggambar kotak deteksi pada frame
def draw_boxes(frame, outputs):
    colors = ['r', 'g', 'b', 'y', 'c', 'm', 'k']
    probas = outputs.logits.softmax(-1)[0, :, :-1].cpu()
    keep = probas.max(-1).values > 0.9
    bboxes_scaled = outputs.pred_boxes[0, keep].cpu()
    labels = probas[keep].argmax(-1)

    for (xmin, ymin, xmax, ymax), label in zip(bboxes_scaled, labels):
        xmin = int(xmin * frame.shape[1])
        xmax = int(xmax * frame.shape[1])
        ymin = int(ymin * frame.shape[0])
        ymax = int(ymax * frame.shape[0])
        color = (0, 255, 0)  # Green color for bounding box
        cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), color, 2)
        text = f"{model.config.id2label[label.item()]}: {probas[keep][label].max().item():.2f}"
        cv2.putText(frame, text, (xmin, ymin - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, color, 2)
    return frame

# Langkah 5: Mengunduh video contoh
!wget -O video.mp4 https://ia800102.us.archive.org/23/items/SampleVideo1280x72020mb/SampleVideo_1280x720_20mb.mp4

# Langkah 6: Membaca video dan melakukan deteksi objek pada setiap frame
cap = cv2.VideoCapture('video.mp4')

# Mendapatkan lebar dan tinggi frame
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = cap.get(cv2.CAP_PROP_FPS)

# Menyiapkan video writer untuk menyimpan hasil
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter('output.mp4', fourcc, fps, (width, height))

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # Preprocess the frame
    inputs = processor(images=frame, return_tensors="pt").to("cuda")

    # Perform object detection
    with torch.no_grad():
        outputs = model(**inputs)

    # Draw detection boxes on the frame
    frame = draw_boxes(frame, outputs)

    # Write the frame to the output video
    out.write(frame)

cap.release()
out.release()

# Langkah 7: Menampilkan video hasil
from IPython.display import Video
Video("output.mp4", embed=True)