In [1]:
!pip install spotipy
!pip install gradio
!pip install opencv-python
!pip install PyQt5
!pip install flask




In [47]:
from transformers import DistilBertTokenizer, DistilBertForSequenceClassification
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch.nn as nn
import torch
import torchvision.models as models
import torchvision.transforms as transforms
import torch.optim as optim
import pickle
import os
from PIL import Image
from torch.utils.data import DataLoader, Dataset
import torch.nn.functional as F
from sklearn.preprocessing import LabelEncoder
from torchvision.models import resnet18, ResNet18_Weights
from sklearn.model_selection import train_test_split
import re
import requests
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import gradio as gr
import cv2
import matplotlib.pyplot as plt
import shutil
import pandas as pd
import sounddevice as sd
import soundfile as sf
import subprocess
import base64
import json
import sys
from flask import Flask, request, jsonify
from PyQt5.QtWidgets import (
    QApplication, QWidget, QLabel, QPushButton, QFileDialog, QLineEdit, QMessageBox
)
from PyQt5.QtGui import QPixmap, QFont, QIcon
from PyQt5.QtCore import Qt, QSize, QCoreApplication
import requests
import sounddevice as sd
from scipy.io.wavfile import write
import matplotlib.pyplot as plt

In [85]:
# 텍스트용 BERT 모델 로딩
bert_model = DistilBertForSequenceClassification.from_pretrained("distilbert-base-multilingual-cased", num_labels=12)
tokenizer = DistilBertTokenizer.from_pretrained("distilbert-base-multilingual-cased")

# 이미지용 CNN 모델 정의
class CNNClassifier(nn.Module):
    def __init__(self, num_labels):
        super(CNNClassifier, self).__init__()
        self.backbone = models.resnet18(weights=models.ResNet18_Weights.DEFAULT)
        self.backbone.fc = nn.Linear(self.backbone.fc.in_features, num_labels)

    def forward(self, x):
        return self.backbone(x)

cnn_model = CNNClassifier(num_labels=16)

# 이미지 전처리 정의
image_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [99]:
# 라벨 인코더 생성 및 저장
labels = [
    "10CM - 고장난걸까",
    "김나영 - 일기",
    "김수현 - 청혼",
    "김태래(ZerobaseOne) - 더 바랄게 없죠",
    "부석순 - 자꾸만 웃게돼",
    "소수빈 - Last Chance",
    "최유리 - Promise",
    "크러쉬(Crush) - 미안해 미워해 사랑해",
    "폴캄 - 좋아해요",
    "헤이즈(Heize) - 멈춰줘",
    "홍이삭 - fallin'" 
    "dori - 떨림"
]
label_encoder = LabelEncoder()
label_encoder.fit(labels)
with open("label_encoder.pkl", "wb") as f:
    pickle.dump(label_encoder, f)

In [100]:
# 입력 파이프라인 설정: 각 OST 별로 프레임 디렉토리 생성

def create_frame_dirs_per_ost(base_dir="video_frames"):
    os.makedirs(base_dir, exist_ok=True)
    for label_name in label_encoder.classes_:
        label_idx = label_encoder.transform([label_name])[0]
        output_dir = os.path.join(base_dir, str(label_idx))
        os.makedirs(output_dir, exist_ok=True)

create_frame_dirs_per_ost()


In [88]:
# 텍스트용 커스텀 Dataset 클래스
class OSTTextDataset(Dataset):
    def __init__(self, texts, labels):
        self.texts = texts
        self.labels = labels

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        encoding = tokenizer(
            self.texts[idx],
            truncation=True,
            padding='max_length',
            max_length=64,
            return_tensors='pt'
        )
        return {
            'input_ids': encoding['input_ids'].squeeze(),
            'attention_mask': encoding['attention_mask'].squeeze(),
            'labels': torch.tensor(self.labels[idx])
        }

class OSTImageDataset(Dataset):
    def __init__(self, image_paths, labels, transform):
        self.image_paths = image_paths
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        img = Image.open(self.image_paths[idx]).convert("RGB")
        img = self.transform(img)
        return {
            'image': img,
            'label': torch.tensor(self.labels[idx])
        }

In [101]:
# 영상 학습 데이터셋 구성 함수
def extract_frames(video_path, save_dir, interval=30):
    cap = cv2.VideoCapture(video_path)
    os.makedirs(save_dir, exist_ok=True)
    count = 0
    frame_idx = 0
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        if frame_idx % interval == 0:
            save_path = os.path.join(save_dir, f"frame_{count:03d}.jpg")
            cv2.imwrite(save_path, frame)
            count += 1
        frame_idx += 1
    cap.release()
    print(f"✅ {count} frames saved to {save_dir}")

video_train_dir = "video_train"

def build_video_frame_dataset(interval=30):
    print("🔄 영상 학습 프레임 추출 중...")
    for fname in os.listdir(video_train_dir):
        if fname.endswith(".mp4") or fname.endswith(".avi"):
            label_name = fname.split("_")[0].strip()
            try:
                label_idx = label_encoder.transform([label_name])[0]
            except ValueError:
                print(f"⚠️ 라벨 누락 또는 인식 불가: {label_name}")
                continue
            video_path = os.path.join(video_train_dir, fname)
            output_dir = os.path.join("video_frames", str(label_idx))
            extract_frames(video_path, output_dir, interval=interval)

build_video_frame_dataset()

🔄 영상 학습 프레임 추출 중...
⚠️ 라벨 누락 또는 인식 불가: 김나영 - 일기.mp4
⚠️ 라벨 누락 또는 인식 불가: 부석순 - 자꾸만 웃게돼.mp4
⚠️ 라벨 누락 또는 인식 불가: dori - 떨림.mp4
⚠️ 라벨 누락 또는 인식 불가: 크러쉬(Crush) - 미안해 미워해 사랑해.mp4
⚠️ 라벨 누락 또는 인식 불가: 10CM - 고장난걸까.mp4
⚠️ 라벨 누락 또는 인식 불가: 소수빈 - Last Chance.mp4
⚠️ 라벨 누락 또는 인식 불가: 폴킴 - 좋아해요.mp4
⚠️ 라벨 누락 또는 인식 불가: 최유리 - Promise.mp4
⚠️ 라벨 누락 또는 인식 불가: 김수현 - 청혼.mp4
⚠️ 라벨 누락 또는 인식 불가: 홍이삭 - fallin'.mp4
⚠️ 라벨 누락 또는 인식 불가: 헤이즈(Heize) - 멈춰줘.mp4
⚠️ 라벨 누락 또는 인식 불가: 김태래(ZerobaseOne) - 더 바랄게 없죠.mp4


In [102]:
# 학습/검증용 데이터셋 로더 구성
text_dataset = OSTTextDataset(texts=image_paths, labels=image_labels)
image_dataset = OSTImageDataset(image_paths=image_paths, labels=image_labels, transform=image_transform)

text_train_loader = DataLoader(text_dataset, batch_size=8, shuffle=True)
image_train_loader = DataLoader(image_dataset, batch_size=8, shuffle=True)

ValueError: num_samples should be a positive integer value, but got num_samples=0

In [93]:
# 영상 학습 데이터셋 구성 함수
def extract_frames(video_path, save_dir, interval=30):

    cap = cv2.VideoCapture(video_path)
    os.makedirs(save_dir, exist_ok=True)
    count = 0
    frame_idx = 0
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        if frame_idx % interval == 0:
            save_path = os.path.join(save_dir, f"frame_{count:03d}.jpg")
            cv2.imwrite(save_path, frame)
            count += 1
        frame_idx += 1
    cap.release()
    print(f"✅ {count} frames saved to {save_dir}")
    
# 영상 학습 데이터 자동 처리 및 프레임 추출
def predict_ost_from_image(image_path):
    image = Image.open(image_path).convert("RGB")
    image = image_transform(image).unsqueeze(0)
    cnn_model.eval()
    with torch.no_grad():
        output = cnn_model(image)
        pred_idx = output.argmax(dim=1).item()
    return label_encoder.inverse_transform([pred_idx])[0]
video_train_dir = "video_train"

def build_video_frame_dataset(interval=30):
    print("🔄 영상 학습 프레임 추출 중...")
    for fname in os.listdir(video_train_dir):
        if fname.endswith(".mp4") or fname.endswith(".avi"):
            label_name = fname.split("_")[0].strip()
            try:
                label_idx = label_encoder.transform([label_name])[0]
            except ValueError:
                print(f"⚠️ 라벨 누락 또는 인식 불가: {label_name}")
                continue
            video_path = os.path.join(video_train_dir, fname)
            output_dir = os.path.join("video_frames", str(label_idx))
            extract_frames(video_path, output_dir, interval=interval)

build_video_frame_dataset()

🔄 영상 학습 프레임 추출 중...
⚠️ 라벨 누락 또는 인식 불가: 김나영 - 일기.mp4
⚠️ 라벨 누락 또는 인식 불가: 부석순 - 자꾸만 웃게돼.mp4
⚠️ 라벨 누락 또는 인식 불가: dori - 떨림.mp4
⚠️ 라벨 누락 또는 인식 불가: 크러쉬(Crush) - 미안해 미워해 사랑해.mp4
⚠️ 라벨 누락 또는 인식 불가: 10CM - 고장난걸까.mp4
⚠️ 라벨 누락 또는 인식 불가: 소수빈 - Last Chance.mp4
⚠️ 라벨 누락 또는 인식 불가: 폴킴 - 좋아해요.mp4
⚠️ 라벨 누락 또는 인식 불가: 최유리 - Promise.mp4
⚠️ 라벨 누락 또는 인식 불가: 김수현 - 청혼.mp4
⚠️ 라벨 누락 또는 인식 불가: 홍이삭 - fallin'.mp4
⚠️ 라벨 누락 또는 인식 불가: 헤이즈(Heize) - 멈춰줘.mp4
⚠️ 라벨 누락 또는 인식 불가: 김태래(ZerobaseOne) - 더 바랄게 없죠.mp4


In [None]:
# 학습/검증용 데이터셋 로더 구성
text_dataset = OSTTextDataset(texts=image_paths, labels=image_labels)
image_dataset = OSTImageDataset(image_paths=image_paths, labels=image_labels, transform=image_transform)

text_train_loader = DataLoader(text_dataset, batch_size=8, shuffle=True)
image_train_loader = DataLoader(image_dataset, batch_size=8, shuffle=True)

ValueError: num_samples should be a positive integer value, but got num_samples=0

In [97]:
# 각 OST 별로 프레임 디렉토리 1개씩 생성
for label_name in label_encoder.classes_:
    label_idx = label_encoder.transform([label_name])[0]
    output_dir = os.path.join(video_frame_dir, str(label_idx))
    os.makedirs(output_dir, exist_ok=True)

In [98]:
# 📁 학습용 영상 데이터를 정리하여 프레임을 이미지로 추출하고 경로 생성
video_train_dir = "video_train"
video_frame_dir = "video_frames"
os.makedirs(video_frame_dir, exist_ok=True)

In [54]:
# 영상 폴더 내 모든 영상 처리
for fname in os.listdir(video_train_dir):
    if fname.endswith(".mp4") or fname.endswith(".avi"):
        label_name = fname.split("_")[0].strip()
        try:
            label_idx = label_encoder.transform([label_name])[0]
        except:
            print(f"⚠️ 라벨 누락: {label_name}")
            continue
        video_path = os.path.join(video_train_dir, fname)
        output_dir = os.path.join(video_frame_dir, str(label_idx))
        extract_frames(video_path, output_dir, interval=30)

def predict_osts_from_video(video_path, save_csv_path, interval=30):
    extract_frames(video_path, "temp_frames", interval=interval)
    frame_files = sorted([os.path.join("temp_frames", f) for f in os.listdir("temp_frames") if f.endswith(".jpg")])
    predictions = []
    for frame in frame_files:
        pred = predict_ost_from_image(frame)
        predictions.append((frame, pred))
    df = pd.DataFrame(predictions, columns=["frame", "predicted_ost"])
    df.to_csv(save_csv_path, index=False)
    print(f"예측 결과 저장 완료: {save_csv_path}")

⚠️ 라벨 누락: 김나영 - 일기.mp4
⚠️ 라벨 누락: 부석순 - 자꾸만 웃게돼.mp4
⚠️ 라벨 누락: dori - 떨림.mp4
⚠️ 라벨 누락: 크러쉬(Crush) - 미안해 미워해 사랑해.mp4
⚠️ 라벨 누락: 10CM - 고장난걸까.mp4
⚠️ 라벨 누락: 소수빈 - Last Chance.mp4
⚠️ 라벨 누락: 폴킴 - 좋아해요.mp4
⚠️ 라벨 누락: 최유리 - Promise.mp4
⚠️ 라벨 누락: 김수현 - 청혼.mp4
⚠️ 라벨 누락: 홍이삭 - fallin'.mp4
⚠️ 라벨 누락: 헤이즈(Heize) - 멈춰줘.mp4
⚠️ 라벨 누락: 김태래(ZerobaseOne) - 더 바랄게 없죠.mp4


In [55]:
extract_frames(
    video_path= "video_train/10CM - 고장난걸까.mp4",
    save_dir="video_frames/0",
    interval=30
)

✅ 22 frames saved to video_frames/0


In [56]:
extract_frames(
    video_path= "video_train/김나영 - 일기.mp4",
    save_dir="video_frames/1",
    interval=30  # 30프레임마다 1장씩 추출
)

✅ 100 frames saved to video_frames/1


In [57]:
extract_frames(
    video_path= "video_train/홍이삭 - fallin'.mp4",
    save_dir="video_frames/2",
    interval=30 
)

✅ 109 frames saved to video_frames/2


In [58]:
extract_frames(
    video_path= "video_train/김수현 - 청혼.mp4",
    save_dir="video_frames/3",
    interval=30 
)

✅ 99 frames saved to video_frames/3


In [59]:
extract_frames(
    video_path= "video_train/김태래(ZerobaseOne) - 더 바랄게 없죠.mp4",
    save_dir="video_frames/4",
    interval=30
)

✅ 80 frames saved to video_frames/4


In [60]:
extract_frames(
    video_path= "video_train/10cm - 내 눈에만 보여.mp4",
    save_dir="video_frames/5",
    interval=30
)

✅ 0 frames saved to video_frames/5


OpenCV: Couldn't read video stream from file "video_train/10cm - 내 눈에만 보여.mp4"


In [61]:
extract_frames(
    video_path = "video_train/부석순 - 자꾸만 웃게돼.mp4",
    save_dir="video_frames/6",
    interval=30
)

✅ 25 frames saved to video_frames/6


In [62]:
extract_frames(
    video_path = "video_train/소수빈 - Last Chance.mp4",
    save_dir="video_frames/7",
    interval=30
)

✅ 94 frames saved to video_frames/7


In [63]:
extract_frames(
    video_path = "video_train/최유리 - Promise.mp4",
    save_dir="video_frames/8",
    interval=30
)

✅ 79 frames saved to video_frames/8


In [64]:
extract_frames(
    video_path = "video_train/크러쉬(Crush) - 미안해 미워해 사랑해.mp4",
    save_dir="video_frames/9",
    interval=30 
)

✅ 155 frames saved to video_frames/9


In [65]:
extract_frames(
    video_path = "video_train/폴킴 - 좋아해요.mp4",
    save_dir="video_frames/10",
    interval=30 
)

✅ 83 frames saved to video_frames/10


In [66]:
extract_frames(
    video_path = "video_train/헤이즈(Heize) - 멈춰줘.mp4",
    save_dir="video_frames/11",
    interval=30 
)

✅ 110 frames saved to video_frames/11


In [67]:
extract_frames(
    video_path = "video_train/dori - 떨림.mp4",
    save_dir="video_frames/12",
    interval=30 
)

✅ 49 frames saved to video_frames/12


In [81]:
# 영상 학습 프레임 경로 수집
video_train_image_paths = []
video_train_labels = []
video_train_dir = "video_train"
video_frame_dir = "video_frames"
os.makedirs(video_frame_dir, exist_ok=True)

for fname in os.listdir("video_train"):
    if fname.endswith(".mp4") or fname.endswith(".avi"):
        label_name = fname.split("_")[0].strip()
        try:
            label_idx = label_encoder.transform([label_name])[0]
        except ValueError:
            print(f"⚠️ 라벨 누락 또는 인식 불가: {label_name}")
            continue
        video_path = os.path.join("video_train_", fname)
        output_dir = os.path.join("video_frames", str(label_idx))
        extract_frames(video_path, output_dir, interval=30)

        if os.path.exists(output_dir):
            for frame_fname in os.listdir(output_dir):
                if frame_fname.endswith(".jpg"):
                    frame_path = os.path.join(output_dir, frame_fname)
                    if os.path.exists(frame_path):
                        video_train_image_paths.append(frame_path)
                        video_train_labels.append(label_idx)

print(f"🎞️ 총 학습용 프레임 수: {len(video_train_image_paths)}")

⚠️ 라벨 누락 또는 인식 불가: 김나영 - 일기.mp4
⚠️ 라벨 누락 또는 인식 불가: 부석순 - 자꾸만 웃게돼.mp4
⚠️ 라벨 누락 또는 인식 불가: dori - 떨림.mp4
⚠️ 라벨 누락 또는 인식 불가: 크러쉬(Crush) - 미안해 미워해 사랑해.mp4
⚠️ 라벨 누락 또는 인식 불가: 10CM - 고장난걸까.mp4
⚠️ 라벨 누락 또는 인식 불가: 소수빈 - Last Chance.mp4
⚠️ 라벨 누락 또는 인식 불가: 폴킴 - 좋아해요.mp4
⚠️ 라벨 누락 또는 인식 불가: 최유리 - Promise.mp4
⚠️ 라벨 누락 또는 인식 불가: 김수현 - 청혼.mp4
⚠️ 라벨 누락 또는 인식 불가: 홍이삭 - fallin'.mp4
⚠️ 라벨 누락 또는 인식 불가: 헤이즈(Heize) - 멈춰줘.mp4
⚠️ 라벨 누락 또는 인식 불가: 김태래(ZerobaseOne) - 더 바랄게 없죠.mp4
🎞️ 총 학습용 프레임 수: 0


In [78]:
# 영상 예측 및 CSV 저장 함수 정의
def predict_ost_from_image(img_path):
    cnn_model.eval()
    image = Image.open(img_path).convert("RGB")
    image = image_transform(image).unsqueeze(0)
    with torch.no_grad():
        outputs = cnn_model(image)
        probs = F.softmax(outputs, dim=1)
        pred_idx = torch.argmax(probs, dim=1).item()
    return label_encoder.inverse_transform([pred_idx])[0]

def predict_osts_from_video(video_path, save_csv_path, interval=30):
    temp_dir = "temp_frames"
    extract_frames(video_path, temp_dir, interval=interval)
    frame_files = sorted([os.path.join(temp_dir, f) for f in os.listdir(temp_dir) if f.endswith(".jpg")])
    predictions = []
    for frame in frame_files:
        pred = predict_ost_from_image(frame)
        predictions.append((frame, pred))
    df = pd.DataFrame(predictions, columns=["frame", "predicted_ost"])
    df.to_csv(save_csv_path, index=False)
    print(f"✅ 예측 결과 저장 완료: {save_csv_path}")

In [70]:
# 이미지 파일 정리 스크립트 생성
def organize_images_by_label():
    for i, (scene_text, label) in enumerate(scene_data):
        label_index = label_encoder.transform([label])[0]
        src_path = f"images_raw/scene_{i+1}.jpg"
        dst_dir = f"images/{label_index}"
        dst_path = f"{dst_dir}/scene_{i+1}.jpg"
        os.makedirs(dst_dir, exist_ok=True)
        if os.path.exists(src_path):
            shutil.move(src_path, dst_path)
        else:
            print(f"이미지 없음: {src_path}")

In [79]:
# CNN 학습 루프

def train_cnn(num_epochs=3):
    global train_img_loader
    cnn_model.train()
    optimizer = optim.Adam(cnn_model.parameters(), lr=1e-4)
    loss_fn = nn.CrossEntropyLoss()

    # 영상 기반 학습 데이터 추가
    combined_image_paths = image_paths + video_train_image_paths
    combined_labels = list(y) + video_train_labels

    # 새로운 학습 데이터셋 구성
    train_img_dataset = OSTImageDataset(combined_image_paths, combined_labels, image_transform)
    train_img_loader = DataLoader(train_img_dataset, batch_size=4, shuffle=True)

    epoch_losses = []

    for epoch in range(num_epochs):
        total_loss = 0
        for batch in train_img_loader:
            images = batch['image']
            labels = batch['label']
            optimizer.zero_grad()
            outputs = cnn_model(images)
            loss = loss_fn(outputs, labels)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()

        epoch_losses.append(total_loss)
        print(f"[CNN] Epoch {epoch+1}, Loss: {total_loss:.4f}")

    torch.save(cnn_model.state_dict(), "cnn_model.pt")
    print("CNN 학습 완료 및 저장: cnn_model.pt")

In [72]:
# BERT 학습 루프

def train_bert_model(model, dataloader, num_epochs=5, lr=1e-4):
    model.train()
    optimizer = optim.Adam(model.parameters(), lr=lr)
    for epoch in range(num_epochs):
        total_loss = 0
        for batch in dataloader:
            input_ids = batch['input_ids']
            attention_mask = batch['attention_mask']
            labels = batch['labels']

            optimizer.zero_grad()
            outputs = model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
            loss = outputs.loss
            loss.backward()
            optimizer.step()
            total_loss += loss.item()

        print(f"[BERT] Epoch {epoch+1}/{num_epochs}, Loss: {total_loss:.4f}")


        print(f"Epoch {epoch + 1}, Loss: {total_loss:.4f}")
    torch.save(bert_model.state_dict(), "bert_model.pt")
    print("BERT 학습 완료 및 저장: bert_model.pt")

In [73]:
# BERT + CNN 앙상블 예측 함수
def ensemble_predict(text, image_path):
    # BERT 예측
    bert_model.eval()
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=64)
    with torch.no_grad():
        logits = bert_model(**inputs).logits
        bert_conf = F.softmax(logits, dim=1).squeeze()

    # CNN 예측
    image = Image.open(image_path).convert("RGB")
    image = image_transform(image).unsqueeze(0)
    cnn_model.eval()
    with torch.no_grad():
        cnn_logits = cnn_model(image)
        cnn_conf = F.softmax(cnn_logits, dim=1).squeeze()
    # 앙상블: 평균
    final_conf = (bert_conf + cnn_conf) / 2
    pred_idx = final_conf.argmax().item()
    pred_label = label_encoder.inverse_transform([pred_idx])[0]

    return pred_label, final_conf[pred_idx].item()

In [83]:
# 🔧 Spotify API 설정
SPOTIFY_CLIENT_ID = "04df9d7a817d4709a27eee2e1ecfb2f2"
SPOTIFY_CLIENT_SECRET = "d36b326fc5df4a97b3ba1a96f13280a2"

# 🔑 AudD API Key
AUDD_API_KEY = "757a1dc15f25bd48595392e44ca2acb6"


def record_audio_mp3(filename="recorded.mp3", duration=8, samplerate=44100):
    temp_wav = "temp.wav"
    print("🎙 녹음 시작...")
    recording = sd.rec(int(duration * samplerate), samplerate=samplerate, channels=1)
    sd.wait()
    sf.write(temp_wav, recording, samplerate)
    print("녹음 완료: ", temp_wav)

    subprocess.run(["ffmpeg", "-y", "-i", temp_wav, filename], check=True)
    print("MP3 변환 완료: ", filename)
    return filename

def recognize_with_audd(file_path):
    print("AudD API로 음악 인식 중...")
    url = 'https://api.audd.io/'
    with open(file_path, 'rb') as f:
        files = {
            'file': f
        }
        data = {
            'api_token': AUDD_API_KEY,
            'return': 'spotify'
        }
        response = requests.post(url, data=data, files=files)
    if response.status_code != 200:
        print("AudD API 실패:", response.status_code)
        return None
    return response.json()

def search_spotify(query):
    print(f"🔎 Spotify에서 '{query}' 검색 중...")
    auth_manager = SpotifyClientCredentials(client_id=SPOTIFY_CLIENT_ID, client_secret=SPOTIFY_CLIENT_SECRET)
    sp = spotipy.Spotify(auth_manager=auth_manager)
    result = sp.search(q=query, limit=1, type='track')
    if result['tracks']['items']:
        track = result['tracks']['items'][0]
        return {
            '제목': track['name'],
            '아티스트': track['artists'][0]['name'],
            '앨범': track['album']['name'],
            '미리듣기': track['preview_url'],
            '앨범커버': track['album']['images'][0]['url']
        }
    else:
        return None

if __name__ == "__main__":
    file = record_audio_mp3(duration=8)
    audd_result = recognize_with_audd(file)

    try:
        title = audd_result['result']['title']
        artist = audd_result['result']['artist']
        print(f"인식된 곡: {artist} - {title}")

        track_info = search_spotify(f"{title} {artist}")
        if track_info:
            print("Spotify 정보:")
            for k, v in track_info.items():
                print(f"{k}: {v}")
        else:
            print("Spotify에서 정보를 찾을 수 없습니다.")
    except Exception as e:
        print("음악 인식 실패 또는 AudD 응답 오류:", e)

🎙 녹음 시작...
||PaMacCore (AUHAL)|| Error on line 1322: err='-10851', msg=Audio Unit: Invalid Property Value


PortAudioError: Error opening InputStream: Internal PortAudio error [PaErrorCode -9986]

In [None]:
import sys
import os
from PyQt5.QtWidgets import (
    QApplication, QWidget, QLabel, QPushButton, QFileDialog, QLineEdit, QMessageBox
)
from PyQt5.QtGui import QPixmap, QFont, QIcon
from PyQt5.QtCore import Qt, QSize, QCoreApplication
import requests
import sounddevice as sd
from scipy.io.wavfile import write

QCoreApplication.setAttribute(Qt.AA_EnableHighDpiScaling, True)

class DeepTuneApp(QWidget):
    def __init__(self):
        super().__init__()

        self.backend_url = "http://<FLASK_SERVER_URL>"

        self.setWindowTitle("DeepTune 시뮬레이터")
        self.setFixedSize(375, 812)

        self.selected_image_path = None
        self.selected_audio_path = None

        self.bg = QLabel(self)
        if os.path.exists("assets/background.png"):
            bg_pixmap = QPixmap("assets/background.png")
            self.bg.setPixmap(bg_pixmap)
            self.bg.setGeometry(0, 0, 375, 812)
            self.bg.lower()
        else:
            self.bg.setStyleSheet("background-color: #1D2671;")

        # 상단 안내 텍스트
        self.header = QLabel("음악을 찾아보세요", self)
        self.header.setFont(QFont("Helvetica", 16, QFont.Bold))
        self.header.setStyleSheet("color: white;")
        self.header.setAlignment(Qt.AlignCenter)
        self.header.setGeometry(0, 30, 375, 40)

        # 마이크 버튼 - 좌측 상단 안내문구 좌측
        self.mic_btn = QPushButton(self)
        self.mic_btn.setIcon(QIcon("assets/mic2.png"))
        self.mic_btn.setIconSize(QSize(30, 30))
        self.mic_btn.setGeometry(20, 35, 30, 30)
        self.mic_btn.setStyleSheet("border: none;")
        self.mic_btn.clicked.connect(self.select_audio)

        # 마이크 버튼 아래 라벨
        self.mic_label = QLabel("음성", self)
        self.mic_label.setFont(QFont("Helvetica", 10))
        self.mic_label.setStyleSheet("color: white;")
        self.mic_label.setAlignment(Qt.AlignCenter)
        self.mic_label.setGeometry(10, 70, 50, 20)

        # 이미지 버튼 - 우측 상단 안내문구 우측
        self.img_btn = QPushButton(self)
        self.img_btn.setIcon(QIcon("assets/photo.png"))
        self.img_btn.setIconSize(QSize(30, 30))
        self.img_btn.setGeometry(325, 35, 30, 30)
        self.img_btn.setStyleSheet("border: none;")
        self.img_btn.clicked.connect(self.select_image)

        # 이미지 버튼 아래 라벨
        self.img_label = QLabel("사진", self)
        self.img_label.setFont(QFont("Helvetica", 10))
        self.img_label.setStyleSheet("color: white;")
        self.img_label.setAlignment(Qt.AlignCenter)
        self.img_label.setGeometry(315, 70, 50, 20)

        # 앨범 이미지
        self.album = QLabel(self)
        self.album.setPixmap(QPixmap())  # 초기엔 비움
        self.album.setGeometry(107, 256, 160, 160)
        self.album.setStyleSheet("background-color: rgba(255, 255, 255, 30); border-radius: 12px;")

        # 노래 제목
        self.song_title = QLabel("노래 제목", self)
        self.song_title.setFont(QFont("Helvetica", 14, QFont.Bold))
        self.song_title.setStyleSheet("color: white;")
        self.song_title.setAlignment(Qt.AlignCenter)
        self.song_title.setGeometry(0, 430, 375, 30)

        # 아티스트
        self.artist = QLabel("아티스트", self)
        self.artist.setFont(QFont("Helvetica", 11))
        self.artist.setStyleSheet("color: #DADADA;")
        self.artist.setAlignment(Qt.AlignCenter)
        self.artist.setGeometry(0, 460, 375, 25)

        # 텍스트 입력창 - 가장 아래
        self.input = QLineEdit(self)
        self.input.setPlaceholderText("장면 설명을 입력하세요")
        self.input.setStyleSheet("""
            padding: 8px;
            font-size: 13px;
            border-radius: 20px;
            background-color: rgba(255, 255, 255, 180);
        """)
        self.input.setGeometry(60, 740, 255, 50)
        self.input.returnPressed.connect(self.analyze_text)

        
    #백엔드로 입력 데이터 전송
    def analyze_text(self):
        text = self.input.text()
        if text:
            self.send_text_to_backend(text)
        else:
            QMessageBox.warning(self, "입력 오류", "장면 설명을 입력해주세요.")

    def select_image(self):
        path, _ = QFileDialog.getOpenFileName(self, "파일 선택", "", "Images (*.png)")
        if path:
            self.selected_image_path = path
            self.send_image_to_backend(path)

    def select_audio(self):
        self.send_audio_to_backend()

    def update_result(self, data):
        self.song_title.setText(data.get("title", "제목 없음"))
        self.artist.setText(data.get("artist", "아티스트 없음"))

        cover = data.get("cover", "")
        pixmap = QPixmap()

        if cover.startswith("assets/") and os.path.exists(cover):
            #내부 파일 경로인 경우
            pixmap = QPixmap(cover)
        elif cover.startswith("http"):
            #웹 이미지 URL인 경우
            from urllib.request import urlopen
            from io import BytesIO
            image_data = urlopen(cover).read()
            pixmap.loadFromData(image_data)
        elif cover.startswith("data:image"):
            #base64 인코딩일 경우
            import base64
            header, encoded = cover.split(",", 1)
            image_data = base64.b64decode(encoded)
            pixmap.loadFromData(image_data)

        self.album.setPixmap(pixmap.scaled(160, 160, Qt.KeepAspectRatio, Qt.SmoothTransformation))

    def send_text_to_backend(self, text):
        try:
            #flask 주소 추후에 첨부 예정
            response = requests.post(f"{self.backend_url}/analyze_text", json = {"text": text})
            result = response.json()
            if "title" in result and "artist" in result and "cover" in result:
                self.update_result(result)
            else:
                QMessageBox.warning(self, "서버 응답 오류", "예상한 데이터 형식이 아닙니다.")

        except Exception as e:
            QMessageBox.warning(self, "서버 오류", str(e))

    def send_image_to_backend(self, path):
        try:
            with open(path, "rb") as f:
                files = {"image": f}
                response = requests.post(f"{self.backend_url}/analyze_image", files = files)
                result = response.json()
                if "title" in result and "artist" in result and "cover" in result:
                    self.update_result(result)
                else:
                    QMessageBox.warning(self, "서버 응답 오류", "예상한 데이터 형식이 아닙니다.")

        except Exception as e:
            QMessageBox.warning(self, "오류", str(e))

    def record_audio(self, duration=5, output_file="temp_audio.wav"):
        fs = 44100  # 샘플링 주파수
        QMessageBox.information(self, "녹음 중", f"{duration}초간 녹음합니다. 소리를 들려주세요.")
        recording = sd.rec(int(duration * fs), samplerate=fs, channels=1)
        sd.wait()  # 녹음 완료 대기
        write(output_file, fs, recording)
        return output_file


    def send_audio_to_backend(self):
        try:
            path = self.record_audio(duration=5)  # 5초 녹음
            with open(path, "rb") as f:
                files = {"audio": f}
                response = requests.post(f"{self.backend_url}/analyze_audio", files=files)
                result = response.json()
                if "title" in result and "artist" in result and "cover" in result:
                    self.update_result(result)
                else:
                    QMessageBox.warning(self, "서버 응답 오류", "예상한 데이터 형식이 아닙니다.")
        except Exception as e:
            QMessageBox.warning(self, "오류", str(e))

if __name__ == "__main__":
    app = QApplication(sys.argv)
    window = DeepTuneApp()
    window.show()
    sys.exit(app.exec_())

SystemExit: 0

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)


In [None]:
predict_osts_from_video("video_t/찬열 & 펀치 - Stay With Me_1.mp4", "predicted_stay_with_me.csv")

✅ 132 frames saved to temp_frames
✅ 예측 결과 저장 완료: predicted_stay_with_me.csv


In [104]:
train_cnn_model(cnn_model, image_train_loader, num_epochs=5)

NameError: name 'train_cnn_model' is not defined