In [None]:

!pip install -q inference-gpu[yolo-world]==0.9.12rc1
!pip install -q supervision==0.19.0rc3
!pip install fastapi uvicorn
!pip install python-multipart
!pip install pyngrok
!pip install deepl

In [None]:
import cv2
import supervision as sv
from tqdm import tqdm
from inference.models.yolo_world.yolo_world import YOLOWorld
from fastapi import FastAPI, Request, HTTPException
from starlette.middleware.base import BaseHTTPMiddleware
import base64
import io
import matplotlib.pyplot as plt
from pyngrok import ngrok
import nest_asyncio
import os
import deepl
# FastAPI 앱 설정
app = FastAPI()

# Bounding Box Annotator 설정
BOUNDING_BOX_ANNOTATOR = sv.BoundingBoxAnnotator(thickness=2)
LABEL_ANNOTATOR = sv.LabelAnnotator(text_thickness=2, text_scale=1, text_color=sv.Color.BLACK)

# YOLO-World 모델 로드
model = YOLOWorld(model_id="yolo_world/l")

@app.middleware("http")
async def add_ngrok_headers(request: Request, call_next):
    response = await call_next(request)
    response.headers["ngrok-skip-browser-warning"] = "any_value"
    return response

@app.post("/detect_objects")
async def detect_objects(request: Request):
    data = await request.json()
    video_url = data["video_url"]
    object_name = data["object_name"]
    print(f"Object Name: {object_name}")
    print(f"Video URL: {video_url}")

    # Extract the video name from the URL (assuming the URL ends with the video name)
    video_name = os.path.basename(video_url)
    video_name_without_ext = os.path.splitext(video_name)[0]

    auth_key = "AUTHORIZATION TOKEN"
    translator = deepl.Translator(auth_key)

    result = translator.translate_text(object_name, target_lang="EN-US")
    classes = [cls.strip() for cls in result.text.split(',')]
    print(classes)
    model.set_classes(classes)  # 클래스 이름 설정

    cap = cv2.VideoCapture(video_url)
    if not cap.isOpened():
        raise HTTPException(status_code=500, detail="Failed to open video URL")

    frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    frame_area = frame_width * frame_height
    frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    fps = cap.get(cv2.CAP_PROP_FPS)
    process_fps = 5
    frame_interval = int(fps / process_fps)

    bounding_box_counts = []

    # 아웃풋 동영상 경로 설정
    output_dir = os.path.join(os.getcwd(), 'output_asset')
    os.makedirs(output_dir, exist_ok=True)
    TARGET_VIDEO_PATH = os.path.join(output_dir, f'{video_name_without_ext}_output_video.mp4')

    out = cv2.VideoWriter(TARGET_VIDEO_PATH, cv2.VideoWriter_fourcc(*'mp4v'), 20.0, (frame_width, frame_height))

    for frame_idx in tqdm(range(frame_count)):
        ret, frame = cap.read()
        if not ret:
            break

        if frame_idx % frame_interval == 0:
            results = model.infer(frame, confidence=0.1)
            detections = sv.Detections.from_inference(results).with_nms(threshold=0.1)
            detections = detections[(detections.area / frame_area) < 0.5]

            bounding_box_count = len(detections)
            bounding_box_counts.append(bounding_box_count)

            annotated_frame = frame.copy()
            annotated_frame = BOUNDING_BOX_ANNOTATOR.annotate(annotated_frame, detections)
            annotated_frame = LABEL_ANNOTATOR.annotate(annotated_frame, detections)
            out.write(annotated_frame)

    cap.release()
    out.release()

    total_bounding_boxes = sum(bounding_box_counts)

    # 비디오 파일을 Base64로 인코딩
    with open(TARGET_VIDEO_PATH, "rb") as video_file:
        video_base64 = base64.b64encode(video_file.read()).decode('utf-8')

    # 그래프 생성
    plt.figure(figsize=(10, 6))
    plt.plot(bounding_box_counts, linewidth=3, alpha=0.8)  

    max_val = max(bounding_box_counts)
    min_val = min(bounding_box_counts)

    plt.axhline(y=max_val - 0.6, color='lightgray', linestyle='--') 

    plt.tight_layout()

    graph_buffer = io.BytesIO()
    graph_path = os.path.join(output_dir, f'{video_name_without_ext}_result.png')
    plt.savefig(graph_buffer, format='png')  # 그래프를 버퍼에 저장
    graph_buffer.seek(0)
    graph_data = graph_buffer.getvalue()
    graph_base64 = base64.b64encode(graph_data).decode('utf-8')

    return {
        "total_bounding_boxes": total_bounding_boxes,
        "graph_data": graph_base64,
        "video_data": video_base64,  # Base64 인코딩된 비디오 데이터
        "graph_path": graph_path  # 그래프 이미지 경로
    }

# ngrok 설정 및 FastAPI 서버 실행
ngrok.set_auth_token("AUTHORIZATION TOKEN")
nest_asyncio.apply()
ngrok_tunnel = ngrok.connect(8000)
print("Public URL:", ngrok_tunnel.public_url)

import uvicorn
uvicorn.run(app, host="0.0.0.0", port=8000)