### Final_code

In [None]:
!pip install pygame

Collecting pygame
  Downloading pygame-2.6.1-cp39-cp39-win_amd64.whl.metadata (13 kB)
Downloading pygame-2.6.1-cp39-cp39-win_amd64.whl (10.6 MB)
   ---------------------------------------- 0.0/10.6 MB ? eta -:--:--
   ------------------------ --------------- 6.6/10.6 MB 31.0 MB/s eta 0:00:01
   ---------------------------------------- 10.6/10.6 MB 25.5 MB/s eta 0:00:00
Installing collected packages: pygame
Successfully installed pygame-2.6.1


In [None]:
import os
import cv2
import time
import torch
import numpy as np
import serial
import serial.tools.list_ports
import random
from elevenlabs import ElevenLabs
from dotenv import load_dotenv, find_dotenv
import tempfile
import pygame

# API Key 직접 설정
load_dotenv()
ELEVENLABS_API_KEY = os.getenv("ELEVENLABS_API_KEY")
VOICE_ID = "uyVNoMrnUku1dZyVEXwD"

# Init client
client_tts = ElevenLabs(api_key=ELEVENLABS_API_KEY)

# Device setting
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = torch.jit.load("dongguya.torchscript", map_location=device)
model.eval()

# --------------------------------------
# 🔊 TTS: speak 함수 (실시간 재생, pygame 사용)
# --------------------------------------
def speak(text):
    audio_stream = client_tts.text_to_speech.convert(
        text=text,
        voice_id=VOICE_ID,
        model_id="eleven_multilingual_v2",
        voice_settings={  
        "stability": 0.2,
        "similarity_boost": 0.8,
        "style": 0.9
        }

    )
    audio_bytes = b"".join(chunk for chunk in audio_stream)
    with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_audio:
        temp_audio.write(audio_bytes)
        temp_audio_path = temp_audio.name

    pygame.mixer.init()
    pygame.mixer.music.load(temp_audio_path)
    pygame.mixer.music.play()
    while pygame.mixer.music.get_busy():
        pygame.time.Clock().tick(10)
    pygame.mixer.quit()

    os.remove(temp_audio_path)

# --------------------------------------
# 📦 YOLO 관련 전처리 & 후처리 함수
# --------------------------------------
def preprocess_frame(frame, img_size=640):
    img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    img = cv2.resize(img, (img_size, img_size))
    img = img.astype(np.float32) / 255.0
    tensor = torch.from_numpy(img).permute(2, 0, 1).unsqueeze(0)
    return tensor.to(device)

def box_iou(box1, box2):
    x1 = torch.max(box1[:, None, 0], box2[:, 0])
    y1 = torch.max(box1[:, None, 1], box2[:, 1])
    x2 = torch.min(box1[:, None, 2], box2[:, 2])
    y2 = torch.min(box1[:, None, 3], box2[:, 3])
    inter = (x2 - x1).clamp(0) * (y2 - y1).clamp(0)
    area1 = (box1[:, 2] - box1[:, 0]) * (box1[:, 3] - box1[:, 1])
    area2 = (box2[:, 2] - box2[:, 0]) * (box2[:, 3] - box2[:, 1])
    union = area1[:, None] + area2 - inter
    return inter / union

def simple_nms(boxes, scores, iou_threshold=0.5):
    idxs = scores.argsort(descending=True)
    keep = []
    while idxs.numel() > 0:
        current = idxs[0].item()
        keep.append(current)
        if idxs.numel() == 1:
            break
        ious = box_iou(boxes[current].unsqueeze(0), boxes[idxs[1:]])[0]
        idxs = idxs[1:][ious < iou_threshold]
    return keep

def postprocess_yolo(preds, orig_shape, conf_threshold=0.5, iou_threshold=0.5, img_size=640):
    preds = preds.permute(0, 2, 1)[0]
    cls_confidences = preds[:, 4:7]
    cls_conf, cls_ids = cls_confidences.max(dim=1)
    mask = cls_conf > conf_threshold
    if not mask.any():
        return []
    boxes = preds[mask, :4]
    scores = cls_conf[mask]
    classes = cls_ids[mask]
    keypoints = preds[mask, 7:].reshape(-1, 24, 3)
    xy = boxes[:, :2] - boxes[:, 2:] / 2
    wh = boxes[:, :2] + boxes[:, 2:]
    boxes_xyxy = torch.cat((xy, wh), dim=1)
    keep = simple_nms(boxes_xyxy, scores, iou_threshold)
    boxes_xyxy = boxes_xyxy[keep]
    scores = scores[keep]
    classes = classes[keep]
    keypoints = keypoints[keep]
    orig_h, orig_w = orig_shape
    scale = torch.tensor([orig_w / img_size, orig_h / img_size, orig_w / img_size, orig_h / img_size], device=boxes_xyxy.device)
    boxes_xyxy *= scale
    keypoints[..., 0] *= orig_w / img_size
    keypoints[..., 1] *= orig_h / img_size
    results = []
    for b, s, c, kp in zip(boxes_xyxy, scores, classes, keypoints):
        results.append({
            "box": b.int().tolist(),
            "score": float(s),
            "class": int(c),
            "keypoints": kp.cpu().numpy()
        })
    return results

# --------------------------------------
# 🔌 아두이노 통신
# --------------------------------------
def find_stm32_port():
    ports = serial.tools.list_ports.comports()
    for port in ports:
        if ("STM" in port.description or "STLink" in port.description or "ttyACM" in port.device or "ttyUSB" in port.device):
            return port.device
    raise Exception("STM32 포트를 찾을 수 없습니다.")

def send_command():
    port = find_stm32_port()
    ser = serial.Serial(port, 9600)
    time.sleep(2)
    ser.write(b'1')
    print("✅ 먹이 지급 신호 전송 완료")
    ser.close()

# --------------------------------------
# 🎯 훈련 루프 (5회 반복)
# --------------------------------------
cap = cv2.VideoCapture(1)
class_labels = ["default", "sitting", "lying"]
label_map = {"앉아": 1, "엎드려": 2}
training_rounds = 5

for round_num in range(training_rounds):
    speak("동구야~ 이리와아~!!")
    speak("동구야~ 이리와아~!!")

    
    command = random.choice(["앉아", "엎드려"])
    if command == "앉아":
        speak("착하지~~ 앉아~")
        speak("앉아~~")
    elif command == "엎드려":
        speak("엎드려~~!")
        speak("엎드려~!!")
         
  

# 내부 로직은 그대로 유지
    if command not in label_map:
        print("❌ 잘못된 명령입니다.")
        continue
    target_label_id = label_map[command]
    start_time = None
    success_sent = False
    training_start_time = time.time()

    while time.time() - training_start_time < 3600:
        ret, frame = cap.read()
        if not ret:
            break
        orig_h, orig_w = frame.shape[:2]
        input_tensor = preprocess_frame(frame)
        with torch.no_grad():
            preds = model(input_tensor)
        detections = postprocess_yolo(preds, (orig_h, orig_w))
        detected = any(det["class"] == target_label_id for det in detections)
        if detected:
            if start_time is None:
                start_time = time.time()
            elif time.time() - start_time >= 3 and not success_sent:
                speak("잘했어~ 아주 잘했어~!")
                send_command()
                success_sent = True
                break
        else:
            start_time = None
        cv2.imshow("훈련 중", frame)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
    else:
        speak("시간 초과! 훈련 종료")
        print("⏰ 1시간 내에 행동 인식 실패 → 훈련 종료")

speak("훈련이 모두 끝났어요")
cap.release()
cv2.destroyAllWindows()


### 목소리 조정


In [16]:
import os
import tempfile
import pygame
from elevenlabs import ElevenLabs

# API 키와 설정
ELEVENLABS_API_KEY = "sk_ecf257b80f9b85371df2bacf86ca5cc519c5e28ddd679c76"
VOICE_ID = "uyVNoMrnUku1dZyVEXwD"

# ElevenLabs 클라이언트 초기화
client = ElevenLabs(api_key=ELEVENLABS_API_KEY)

# 테스트할 문장 리스트
test_sentences = [
    "동구야 이리와~!",

    "동구야~ 이리와아~!!",
    "동구야~ 이리와아~!!" 

    "착하지~ 앉아~",
    "앉아~"

    "엎드려~!",
    "엎드려!!"

    "우와~ 동구야 잘했어~!!"
]

# 보이스 세팅 값 여러 조합 시도 가능
voice_settings = {
    "stability": 0.2,
    "similarity_boost": 0.8,
    "style": 0.9
}

# 재생 함수
def play_voice(text):
    audio_stream = client.text_to_speech.convert(
        text=text,
        voice_id=VOICE_ID,
        model_id="eleven_multilingual_v2",
        voice_settings=voice_settings
    )
    audio_bytes = b"".join(chunk for chunk in audio_stream)

    with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_audio:
        temp_audio.write(audio_bytes)
        audio_path = temp_audio.name

    pygame.mixer.init()
    pygame.mixer.music.load(audio_path)
    pygame.mixer.music.play()
    while pygame.mixer.music.get_busy():
        pygame.time.Clock().tick(10)
    pygame.mixer.quit()

    os.remove(audio_path)

# 실행
if __name__ == "__main__":
    print("📣 테스트용 TTS 음성 재생 중...")
    for idx, sentence in enumerate(test_sentences):
        print(f"\n▶️ 문장 {idx+1}: {sentence}")
        play_voice(sentence)
    print("✅ 모든 문장 재생 완료!")


📣 테스트용 TTS 음성 재생 중...

▶️ 문장 1: 동구야 이리와~!

▶️ 문장 2: 동구야~ 이리와아~!!

▶️ 문장 3: 동구야~ 이리와아~!!착하지~ 앉아~

▶️ 문장 4: 앉아~엎드려~!

▶️ 문장 5: 엎드려!!우와~ 동구야 잘했어~!!
✅ 모든 문장 재생 완료!
