### mac4 pro MPS 환경 설정 

```bash
❯ conda create --name ultralytics-env python=3.11 -y                                                                                       ─╯
Retrieving notices: done
Channels:
 - conda-forge
Platform: osx-arm64
Collecting package metadata (repodata.json): done
Solving environment: done
```

```bash
❯ conda activate ultralytics-env
```

```bash
❯ conda install -c conda-forge ultralytics
```

```bash
❯ conda install pytorch torchvision torchaudio -c pytorch-nightly
```

```bash
# environment.yml 만들기
❯ conda env export > environment.yml 
```

```bash
# 다른 시스템에서 이 환경을 재현하려면 다음 명령어를 실행
conda env create -f environment.yml
```

In [1]:
!pip install gtts
!pip install playsound==1.2.2



In [2]:
from gtts import gTTS
import os
import playsound
import time

# 신호 시간 최소/최대 값 설정
MIN_TRAFFIC_TIME = 10
MAX_TRAFFIC_TIME = 60
# 초단위 기준 신호 시간
DEFAULT_STOP_TIME = 30
# 기준 통행량(차량 수)
DEFAULT_TRAFFIC = 20
# 보행자 통행 시간
CROSSING_TIME = 5

signal_change_message = "신호가 바뀌었습니다."

# 구글 tts
def speak(text):
    tts = gTTS(text=text, lang='ko')
    filename='gtts_voice.mp3'
    tts.save(filename)
    playsound.playsound(filename)
    os.remove(filename)

# 신호 변경 로직
def change_traffic_sign(traffic):
    # 신호 시간
    traffic_time = int(traffic/DEFAULT_TRAFFIC*DEFAULT_STOP_TIME)

    # 메세지
    message = f"신호가 {traffic_time}초 뒤에 바뀝니다."
    
    speak(message)
    time.sleep(traffic_time - 1)
    print("차량 황색신호")
    time.sleep(1)
    print("차량 적색신호, 보행자 녹색신호")
    speak(signal_change_message)
    time.sleep(CROSSING_TIME)
    print("보행자 적색신호, 차량 녹색신호")

In [3]:
import torch

# PyTorch 버전 확인
print("PyTorch version:", torch.__version__)

# MPS (Metal Performance Shaders) 지원 여부 확인
print("MPS available:", torch.backends.mps.is_available())

# GPU가 기본 장치로 설정 가능한지 확인
device = torch.device("mps") if torch.backends.mps.is_available() else torch.device("cpu")
print("Selected device:", device)

PyTorch version: 2.6.0.dev20241112
MPS available: False
Selected device: cpu


In [4]:
from ultralytics import YOLO

model = YOLO("yolo11n.pt")  # initialize model

In [5]:
results = model("12.png")  # perform inference
results[0].show()  # display results for the first image
print(results[0].boxes)    # cls에 클래스 인덱스가 포함


image 1/1 C:\Users\kym19\12.png: 384x640 12 cars, 1 bus, 43.5ms
Speed: 1.0ms preprocess, 43.5ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)
ultralytics.engine.results.Boxes object with attributes:

cls: tensor([2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 5.])
conf: tensor([0.7770, 0.6992, 0.5701, 0.5072, 0.3748, 0.3483, 0.3294, 0.3203, 0.2925, 0.2774, 0.2773, 0.2746, 0.2649])
data: tensor([[5.2486e+02, 2.9890e+02, 5.7238e+02, 3.2608e+02, 7.7698e-01, 2.0000e+00],
        [3.7641e+02, 3.1147e+02, 4.2510e+02, 3.3762e+02, 6.9916e-01, 2.0000e+00],
        [4.2420e+02, 2.8857e+02, 4.6528e+02, 3.1102e+02, 5.7007e-01, 2.0000e+00],
        [3.1698e+02, 2.8049e+02, 3.5789e+02, 3.0626e+02, 5.0723e-01, 2.0000e+00],
        [4.1714e+02, 1.7530e+02, 4.6314e+02, 1.9008e+02, 3.7479e-01, 2.0000e+00],
        [3.6959e+02, 2.7969e+02, 4.1802e+02, 3.0919e+02, 3.4829e-01, 2.0000e+00],
        [5.8700e+01, 2.0892e+02, 9.6159e+01, 2.3049e+02, 3.2936e-01, 2.0000e+00],
        [5.4264

In [6]:
# 차량 클래스 ID 리스트
vehicle_classes = [2, 3, 5, 6, 7]  # car, motorcycle, bus, train, truck

# 차량 클래스 ID에 해당하는 객체 개수 계산
num_vehicles = sum((results[0].boxes.cls == class_id).sum().item() for class_id in vehicle_classes)

# change_traffic_sign(num_vehicles)

### 함수 내에서 신호 변경 및 응급차량 검토
- 비동기 처리로 구현했습니다.

In [10]:
import asyncio
async def speak(text):
    """ 구글 tts """
    tts = gTTS(text=text, lang='ko')
    filename='gtts_voice.mp3'
    tts.save(filename)
    playsound.playsound(filename)
    os.remove(filename)

async def capture_frame(model, image_path="12.png"):
    """ YOLO 모델을 사용하여 객체 탐지 수행 """
    return model(image_path)
    
async def detect_emergency_vehicle(model, image_path="12.png"):
    """ 응급차량 감지 함수 """
    results = await capture_frame(model, image_path)

    # 응급차량 id 임의로 지정되었습니다. 실제 id값에 맞게 수정이 필요합니다.
    EMERGENCY_VEHICLE_ID = 1
    return EMERGENCY_VEHICLE_ID in results[0].boxes.cls.tolist()
    
async def change_traffic_sign(model, yolo_model):
    """ 차량 통행량 기반 보행자 신호 조정 및 응급차량 감지 """
    
    # 응급차량 감지 (초기 판단)
    emergency_detected = await detect_emergency_vehicle(model)
    if emergency_detected:
        traffic_time = 60
        await speak(f"응급차량이 접근하고 있습니다. {traffic_time}초 뒤에 신호가 바뀝니다.")
    else:
        results = await capture_frame(yolo_model)
        vehicle_classes = [2, 3, 5, 6, 7]  # 차량 클래스 ID (car, motorcycle, bus, train, truck)
        num_vehicles = sum((results[0].boxes.cls == class_id).sum().item() for class_id in vehicle_classes)

        # 신호 시간 조정 (최소/최대 범위 적용)
        traffic_time = max(MIN_TRAFFIC_TIME, min(MAX_TRAFFIC_TIME, int(num_vehicles / DEFAULT_TRAFFIC * DEFAULT_STOP_TIME)))
        await speak(f"신호가 {traffic_time}초 뒤에 바뀝니다.")

    await asyncio.sleep(traffic_time - 1)
    print("차량 황색신호")
    await asyncio.sleep(1)
    print("차량 적색신호, 보행자 녹색신호")
    await speak("신호가 바뀌었습니다. 주의하여 횡단보도를 건너가십시오.")

    # 보행자 신호가 녹색으로 바뀐 시점에서 응급차량 체크
    if await detect_emergency_vehicle(model):
        await speak("응급 차량이 접근 중입니다. 주의하여 도로를 건너가십시오.")

    # 보행자 신호 종료
    await asyncio.sleep(CROSSING_TIME)
    print("보행자 적색신호, 차량 녹색신호")
    await speak("신호가 종료되었습니다.")

# 임의로 그냥 모델 2개 연달아 넣었습니다. 실사용시에는 finetuned model, yolov11 model 로 변경하여 사용해야합니다.
await change_traffic_sign(model, model)


image 1/1 C:\Users\kym19\12.png: 384x640 12 cars, 1 bus, 35.0ms
Speed: 1.0ms preprocess, 35.0ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

image 1/1 C:\Users\kym19\12.png: 384x640 12 cars, 1 bus, 35.5ms
Speed: 1.0ms preprocess, 35.5ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)
차량 황색신호
차량 적색신호, 보행자 녹색신호

image 1/1 C:\Users\kym19\12.png: 384x640 12 cars, 1 bus, 30.0ms
Speed: 1.0ms preprocess, 30.0ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)
보행자 적색신호, 차량 녹색신호


# 여기까지 작성했습니다. 여기까지 봐주세요

In [12]:
results = model("C-220722_15_CR11_04_A0076.jpg")  # perform inference
results[0].show()  # display results for the first image


image 1/1 /Users/hdj/study/AI/vscode/Ultralytics/C-220722_15_CR11_04_A0076.jpg: 384x640 5 cars, 1 truck, 43.0ms
Speed: 2.0ms preprocess, 43.0ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


In [5]:
results = model("C-221008_13_CR06_01_A0085.jpg")  # perform inference
results[0].show()  # display results for the first image


image 1/1 /Users/hdj/study/AI/vscode/Ultralytics/C-221008_13_CR06_01_A0085.jpg: 384x640 4 persons, 10 cars, 41.9ms
Speed: 1.7ms preprocess, 41.9ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


In [10]:
results = model("C-221005_13_CR06_02_A0184.jpg")  # perform inference
results[0].show()  # display results for the first image


image 1/1 /Users/hdj/study/AI/vscode/Ultralytics/C-221005_13_CR06_02_A0184.jpg: 384x640 10 cars, 3 buss, 41.3ms
Speed: 1.6ms preprocess, 41.3ms inference, 0.4ms postprocess per image at shape (1, 3, 384, 640)


In [2]:
model.track(source="chelsea.mov", save=True)



errors for large sources or long-running streams and videos. See https://docs.ultralytics.com/modes/predict/ for help.

Example:
    results = model(source=..., stream=True)  # generator of Results objects
    for r in results:
        boxes = r.boxes  # Boxes object for bbox outputs
        masks = r.masks  # Masks object for segment masks outputs
        probs = r.probs  # Class probabilities for classification outputs

video 1/1 (frame 1/1766) /Users/hdj/study/AI/vscode/Ultralytics/chelsea.mov: 416x640 14 persons, 60.7ms
video 1/1 (frame 2/1766) /Users/hdj/study/AI/vscode/Ultralytics/chelsea.mov: 416x640 14 persons, 61.3ms
video 1/1 (frame 3/1766) /Users/hdj/study/AI/vscode/Ultralytics/chelsea.mov: 416x640 14 persons, 60.3ms
video 1/1 (frame 4/1766) /Users/hdj/study/AI/vscode/Ultralytics/chelsea.mov: 416x640 14 persons, 60.8ms
video 1/1 (frame 5/1766) /Users/hdj/study/AI/vscode/Ultralytics/chelsea.mov: 416x640 17 persons, 60.5ms
video 1/1 (frame 6/1766) /Users/hdj/study/AI/vscode/

[ultralytics.engine.results.Results object with attributes:
 
 boxes: ultralytics.engine.results.Boxes object
 keypoints: None
 masks: None
 names: {0: 'person', 1: 'bicycle', 2: 'car', 3: 'motorcycle', 4: 'airplane', 5: 'bus', 6: 'train', 7: 'truck', 8: 'boat', 9: 'traffic light', 10: 'fire hydrant', 11: 'stop sign', 12: 'parking meter', 13: 'bench', 14: 'bird', 15: 'cat', 16: 'dog', 17: 'horse', 18: 'sheep', 19: 'cow', 20: 'elephant', 21: 'bear', 22: 'zebra', 23: 'giraffe', 24: 'backpack', 25: 'umbrella', 26: 'handbag', 27: 'tie', 28: 'suitcase', 29: 'frisbee', 30: 'skis', 31: 'snowboard', 32: 'sports ball', 33: 'kite', 34: 'baseball bat', 35: 'baseball glove', 36: 'skateboard', 37: 'surfboard', 38: 'tennis racket', 39: 'bottle', 40: 'wine glass', 41: 'cup', 42: 'fork', 43: 'knife', 44: 'spoon', 45: 'bowl', 46: 'banana', 47: 'apple', 48: 'sandwich', 49: 'orange', 50: 'broccoli', 51: 'carrot', 52: 'hot dog', 53: 'pizza', 54: 'donut', 55: 'cake', 56: 'chair', 57: 'couch', 58: 'potted p

In [3]:
model.track(source="sample_video4.mp4", save=True)



errors for large sources or long-running streams and videos. See https://docs.ultralytics.com/modes/predict/ for help.

Example:
    results = model(source=..., stream=True)  # generator of Results objects
    for r in results:
        boxes = r.boxes  # Boxes object for bbox outputs
        masks = r.masks  # Masks object for segment masks outputs
        probs = r.probs  # Class probabilities for classification outputs

video 1/1 (frame 1/475) /Users/hdj/study/AI/vscode/Ultralytics/sample_video4.mp4: 640x384 2 cars, 60.8ms
video 1/1 (frame 2/475) /Users/hdj/study/AI/vscode/Ultralytics/sample_video4.mp4: 640x384 2 cars, 57.2ms
video 1/1 (frame 3/475) /Users/hdj/study/AI/vscode/Ultralytics/sample_video4.mp4: 640x384 2 cars, 56.2ms
video 1/1 (frame 4/475) /Users/hdj/study/AI/vscode/Ultralytics/sample_video4.mp4: 640x384 2 cars, 54.3ms
video 1/1 (frame 5/475) /Users/hdj/study/AI/vscode/Ultralytics/sample_video4.mp4: 640x384 1 car, 54.7ms
video 1/1 (frame 6/475) /Users/hdj/study/AI/vsco

[ultralytics.engine.results.Results object with attributes:
 
 boxes: ultralytics.engine.results.Boxes object
 keypoints: None
 masks: None
 names: {0: 'person', 1: 'bicycle', 2: 'car', 3: 'motorcycle', 4: 'airplane', 5: 'bus', 6: 'train', 7: 'truck', 8: 'boat', 9: 'traffic light', 10: 'fire hydrant', 11: 'stop sign', 12: 'parking meter', 13: 'bench', 14: 'bird', 15: 'cat', 16: 'dog', 17: 'horse', 18: 'sheep', 19: 'cow', 20: 'elephant', 21: 'bear', 22: 'zebra', 23: 'giraffe', 24: 'backpack', 25: 'umbrella', 26: 'handbag', 27: 'tie', 28: 'suitcase', 29: 'frisbee', 30: 'skis', 31: 'snowboard', 32: 'sports ball', 33: 'kite', 34: 'baseball bat', 35: 'baseball glove', 36: 'skateboard', 37: 'surfboard', 38: 'tennis racket', 39: 'bottle', 40: 'wine glass', 41: 'cup', 42: 'fork', 43: 'knife', 44: 'spoon', 45: 'bowl', 46: 'banana', 47: 'apple', 48: 'sandwich', 49: 'orange', 50: 'broccoli', 51: 'carrot', 52: 'hot dog', 53: 'pizza', 54: 'donut', 55: 'cake', 56: 'chair', 57: 'couch', 58: 'potted p

In [3]:
model.track(source="sample_video2.mov", save=True)



errors for large sources or long-running streams and videos. See https://docs.ultralytics.com/modes/predict/ for help.

Example:
    results = model(source=..., stream=True)  # generator of Results objects
    for r in results:
        boxes = r.boxes  # Boxes object for bbox outputs
        masks = r.masks  # Masks object for segment masks outputs
        probs = r.probs  # Class probabilities for classification outputs

video 1/1 (frame 1/1283) /Users/hdj/study/AI/vscode/Ultralytics/sample_video2.mov: 384x640 2 persons, 26 cars, 1 bus, 7 trucks, 38.5ms
video 1/1 (frame 2/1283) /Users/hdj/study/AI/vscode/Ultralytics/sample_video2.mov: 384x640 15 cars, 1 truck, 39.3ms
video 1/1 (frame 3/1283) /Users/hdj/study/AI/vscode/Ultralytics/sample_video2.mov: 384x640 15 cars, 1 truck, 35.0ms
video 1/1 (frame 4/1283) /Users/hdj/study/AI/vscode/Ultralytics/sample_video2.mov: 384x640 15 cars, 31.7ms
video 1/1 (frame 5/1283) /Users/hdj/study/AI/vscode/Ultralytics/sample_video2.mov: 384x640 15 cars

[ultralytics.engine.results.Results object with attributes:
 
 boxes: ultralytics.engine.results.Boxes object
 keypoints: None
 masks: None
 names: {0: 'person', 1: 'bicycle', 2: 'car', 3: 'motorcycle', 4: 'airplane', 5: 'bus', 6: 'train', 7: 'truck', 8: 'boat', 9: 'traffic light', 10: 'fire hydrant', 11: 'stop sign', 12: 'parking meter', 13: 'bench', 14: 'bird', 15: 'cat', 16: 'dog', 17: 'horse', 18: 'sheep', 19: 'cow', 20: 'elephant', 21: 'bear', 22: 'zebra', 23: 'giraffe', 24: 'backpack', 25: 'umbrella', 26: 'handbag', 27: 'tie', 28: 'suitcase', 29: 'frisbee', 30: 'skis', 31: 'snowboard', 32: 'sports ball', 33: 'kite', 34: 'baseball bat', 35: 'baseball glove', 36: 'skateboard', 37: 'surfboard', 38: 'tennis racket', 39: 'bottle', 40: 'wine glass', 41: 'cup', 42: 'fork', 43: 'knife', 44: 'spoon', 45: 'bowl', 46: 'banana', 47: 'apple', 48: 'sandwich', 49: 'orange', 50: 'broccoli', 51: 'carrot', 52: 'hot dog', 53: 'pizza', 54: 'donut', 55: 'cake', 56: 'chair', 57: 'couch', 58: 'potted p

In [9]:
from collections import defaultdict
import cv2
import numpy as np
from ultralytics import YOLO

# Load the YOLO11 model
model = YOLO("yolo11n.pt")

# Open the video file
# video_path = "uno1.mp4"
video_path = "chelsea.mov"
# video_path = "sample_video2.mov"
cap = cv2.VideoCapture(video_path)

if not cap.isOpened():
    print(f"Error: Could not open video file {video_path}")
    exit()

# Store the track history
track_history = defaultdict(lambda: [])

# Loop through the video frames
while True:
    # Read a frame from the video
    success, frame = cap.read()
    if not success:
        print("End of video or failed to read frame.")
        break  # Exit loop if no more frames

    # Resize the frame to a fixed size
    frame = cv2.resize(frame, (640, 480))

    # Run YOLO11 tracking on the frame, persisting tracks between frames
    results = model.track(frame, persist=True, tracker="bytetrack.yaml")

    # Ensure results contain valid detection boxes
    if not results or not results[0].boxes:
        print("No objects detected in this frame.")
        continue

    # Get the boxes and track IDs
    boxes = results[0].boxes.xywh.cpu()  # Bounding box (x, y, w, h)
    track_ids = results[0].boxes.id  # Object IDs

    if track_ids is not None:
        track_ids = track_ids.int().cpu().tolist()
    else:
        track_ids = []  # Handle cases where no IDs are available

    # Visualize the results on the frame
    annotated_frame = results[0].plot()

    # Plot the tracks
    for box, track_id in zip(boxes, track_ids):
        x, y, w, h = box
        track = track_history[track_id]
        track.append((float(x), float(y)))  # Append center coordinates
        if len(track) > 30:  # Limit to the last 30 frames
            track.pop(0)

        # Draw the tracking lines
        points = np.array(track, dtype=np.int32).reshape((-1, 1, 2))
        cv2.polylines(annotated_frame, [points], isClosed=False, color=(230, 230, 230), thickness=2)

    # Display the annotated frame
    cv2.imshow("YOLO11 Tracking", annotated_frame)

    # Break the loop if 'q' is pressed
    if cv2.waitKey(10) & 0xFF == ord("q"):  # Wait 10ms for a key press
        print("User interrupted the process.")
        break

# Release the video capture object and close the display window
cap.release()

# Ensure all OpenCV windows are closed
cv2.destroyAllWindows()

# For macOS: Wait for a short moment to ensure windows close properly
cv2.waitKey(1)  # Prevents GUI from freezing on macOS


0: 480x640 18 persons, 50.6ms
Speed: 1.3ms preprocess, 50.6ms inference, 0.6ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 18 persons, 46.2ms
Speed: 1.1ms preprocess, 46.2ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 18 persons, 69.0ms
Speed: 0.7ms preprocess, 69.0ms inference, 0.6ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 14 persons, 38.8ms
Speed: 0.6ms preprocess, 38.8ms inference, 0.5ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 14 persons, 38.9ms
Speed: 0.7ms preprocess, 38.9ms inference, 0.7ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 14 persons, 36.5ms
Speed: 1.0ms preprocess, 36.5ms inference, 0.5ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 16 persons, 37.9ms
Speed: 0.8ms preprocess, 37.9ms inference, 0.5ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 15 persons, 37.1ms
Speed: 0.6ms preprocess, 37.1ms inference, 0.5ms postprocess per image at

-1

## emergency vehicle 데이터셋 확보하기

### The direct link to download your zip file is:
- https://universe.roboflow.com/ds/ceRtslf8BP?key=CQDsva7smA

### Use this code to download and unzip your dataset via the command line on any *nix machine:
- curl -L "https://universe.roboflow.com/ds/ceRtslf8BP?key=CQDsva7smA" > roboflow.zip; unzip roboflow.zip; rm roboflow.zip

### Paste this snippet into a notebook from our model library to download and unzip your dataset:
```bash
!pip install roboflow
```

In [None]:

from roboflow import Roboflow
rf = Roboflow(api_key="unauthorized")
project = rf.workspace("yolov7-v0erh").project("emergency-nonemergency-vehicle")
version = project.version(7)
dataset = version.download("yolov11")