In [8]:
## 영상용 api 키, 하고 바로 삭제하기
import requests
import re
from dotenv import load_dotenv
import os
import xml.etree.ElementTree as ET
from datetime import datetime
from pprint import pprint
from youtube_transcript_api import YouTubeTranscriptApi

load_dotenv()
YOUTUBE_API_KEY = os.getenv("YOUTUBE_API_KEY")
YOUTUBE_API_URL = 'https://www.googleapis.com/youtube/v3'



In [31]:
def get_youtube_transcript(url: str) -> str:
    """ 유튜브 영상 URL에 대한 자막을 가져옵니다."""
    
    # 1. 유튜브 URL에서 비디오 ID를 추출합니다.
    video_id_match = re.search(r"(?:v=|\/)([0-9A-Za-z_-]{11}).*", url)
    if not video_id_match:
        raise ValueError("유효하지 않은 YouTube URL이 제공되었습니다")
    video_id = video_id_match.group(1)
    
    languages = ["ko", "en"]
    # 2. youtube_transcript_api를 사용하여 자막을 가져옵니다.
    try:
        transcript_list = YouTubeTranscriptApi.get_transcript(video_id, languages=languages)
        
        # 3. 자막 목록의 'text' 부분을 하나의 문자열로 결합합니다.
        transcript_text = " ".join([entry["text"] for entry in transcript_list])
        return transcript_text

    except Exception as e:
        raise RuntimeError(f"비디오 ID '{video_id}'에 대한 자막을 찾을 수 없거나 사용할 수 없습니다.{e}")




In [33]:
transcript = get_youtube_transcript("https://www.youtube.com/watch?v=atXyXP3yYZ4&t=3s") # operator 관련 영상  OpenAI
print(transcript) # "notebook.output.wordWrap": true in settings.json

RuntimeError: 비디오 ID 'atXyXP3yYZ4'에 대한 자막을 찾을 수 없거나 사용할 수 없습니다.
Could not retrieve a transcript for the video https://www.youtube.com/watch?v=atXyXP3yYZ4! This is most likely caused by:

Request to YouTube failed: 429 Client Error: Too Many Requests for url: https://www.youtube.com/api/timedtext?v=atXyXP3yYZ4&ei=DZF0aOywEJ2z1d8P48TowA4&caps=asr&opi=112496729&xoaf=5&hl=en&ip=0.0.0.0&ipbits=0&expire=1752494973&sparams=ip,ipbits,expire,v,ei,caps,opi,xoaf&signature=82F97B602C563536B07B9EF9B539996DD5B4D55D.B03975908CE1285EF099DCB7B7EEDB8A7583E743&key=yt8&lang=en

If you are sure that the described cause is not responsible for this error and that a transcript should be retrievable, please create an issue at https://github.com/jdepoix/youtube-transcript-api/issues. Please add which version of youtube_transcript_api you are using and provide the information needed to replicate the error. Also make sure that there are no open issues which already describe your problem!

In [21]:
def search_youtube_videos(query: str) :
    """유튜브에서 특정 키워드로 동영상을 검색하고 세부 정보를 가져옵니다"""
    try:
        # 1. 동영상 검색
        max_results: int = 20
        search_url = f"{YOUTUBE_API_URL}/search?part=snippet&q={requests.utils.quote(query)}&type=video&maxResults={max_results}&key={YOUTUBE_API_KEY}"
        print(f"Searching YouTube with URL: {search_url}")

        search_response = requests.get(search_url)
        search_data = search_response.json()
        video_ids = [item['id']['videoId'] for item in search_data.get('items', [])]

        if not video_ids:
            print("No videos found for the query.")
            return []

        video_details_url = f"{YOUTUBE_API_URL}/videos?part=snippet,statistics&id={','.join(video_ids)}&key={YOUTUBE_API_KEY}"
        print(f"영상 정보 가져오는 중: {video_details_url}")
        details_response = requests.get(video_details_url)
        details_response.raise_for_status()
        details_data = details_response.json()

        videos = []
        for item in details_data.get('items', []):
            snippet = item.get('snippet', {})
            statistics = item.get('statistics', {})
            thumbnails = snippet.get('thumbnails', {})
            high_thumbnail = thumbnails.get('high', {}) 
            view_count = statistics.get('viewCount')
            like_count = statistics.get('likeCount')

            video_card = {
                "title": snippet.get('title', 'N/A'),
                "publishedDate": snippet.get('publishedAt', ''),
                "channelName": snippet.get('channelTitle', 'N/A'),
                "channelId": snippet.get('channelId', ''),
                "thumbnailUrl": high_thumbnail.get('url', ''),
                "viewCount": int(view_count) if view_count is not None else None,
                "likeCount": int(like_count) if like_count is not None else None,
                "url": f"https://www.youtube.com/watch?v={item.get('id', '')}",
            }
            videos.append(video_card)

        if not videos:
            print("No video details could be fetched.")
            return []

        return videos

    except Exception as e:
        print(f"Error: {e}")
        return []



In [None]:
import os
import openai
from supabase import create_client, Client
import requests
from dotenv import load_dotenv
load_dotenv()
print("SUPABASE_URL:", os.getenv("SUPABASE_URL"))
print("SUPABASE_KEY:", os.getenv("SUPABASE_KEY"))

SUPABASE_URL = "https://qkwqilxmwyethocfzofq.supabase.co"
SUPABASE_KEY = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJzdXBhYmFzZSIsInJlZiI6InFrd3FpbHhtd3lldGhvY2Z6b2ZxIiwicm9sZSI6ImFub24iLCJpYXQiOjE3NTE4NzU4MDEsImV4cCI6MjA2NzQ1MTgwMX0.WqGljJzf5Jt2vY3gHjpa7gQ9JbMqaZ_u3TXM-YIc79g"
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
YOUTUBE_API_KEY = os.getenv("YOUTUBE_API_KEY")
YOUTUBE_API_URL = 'https://www.googleapis.com/youtube/v3'

supabase: Client = create_client(SUPABASE_URL, SUPABASE_KEY)
openai.api_key = OPENAI_API_KEY

def save_youtube_embeddings(query: str) -> str:
    """유튜브에서 query로 100개 영상을 검색해 title 임베딩 후 supabase DB에 id, url, title, embedding을 중복 없이 저장합니다."""
    max_results = 100
    search_url = f"{YOUTUBE_API_URL}/search?part=snippet&q={requests.utils.quote(query)}&type=video&maxResults={max_results}&key={YOUTUBE_API_KEY}"
    search_response = requests.get(search_url)
    search_data = search_response.json()
    video_ids = [item['id']['videoId'] for item in search_data.get('items', [])]
    if not video_ids:
        return "검색 결과가 없습니다."
    video_details_url = f"{YOUTUBE_API_URL}/videos?part=snippet,statistics&id={','.join(video_ids)}&key={YOUTUBE_API_KEY}"
    details_response = requests.get(video_details_url)
    details_response.raise_for_status()
    details_data = details_response.json()
    count = 0
    for item in details_data.get('items', []):
        snippet = item.get('snippet', {})
        video_id = item.get('id', '')
        title = snippet.get('title', 'N/A')
        url = f"https://www.youtube.com/watch?v={video_id}"
        # 임베딩
        try:
            embedding = openai.embeddings.create(
                input=title,
                model="text-embedding-3-small"
            ).data[0].embedding
        except Exception as e:
            print(f"임베딩 실패: {title} - {e}")
            continue
        # supabase 저장
        data = {
            "video_id": video_id,
            "url": url,
            "title": title,
            "embedding": embedding
        }
        try:
            supabase.table("youtube_videos").insert(data).execute()
            count += 1
        except Exception as e:
            print(f"중복 또는 에러: {video_id} - {e}")
            continue
    return f"총 {count}개 영상이 저장되었습니다."

SUPABASE_URL: https://qkwqilxmwyethocfzofq.supabase.co
SUPABASE_KEY: eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJzdXBhYmFzZSIsInJlZiI6InFrd3FpbHhtd3lldGhvY2Z6b2ZxIiwicm9sZSI6ImFub24iLCJpYXQiOjE3NTE4NzU4MDEsImV4cCI6MjA2NzQ1MTgwMX0.WqGljJzf5Jt2vY3gHjpa7gQ9JbMqaZ_u3TXM-YIc79g


In [23]:
import requests
import re
import os
from dotenv import load_dotenv
load_dotenv()

YOUTUBE_API_KEY = os.getenv("YOUTUBE_API_KEY")
YOUTUBE_API_URL = 'https://www.googleapis.com/youtube/v3'

def get_channel_id_from_video_url(video_url):
    # 유튜브 영상 ID 추출
    video_id_match = re.search(r"(?:v=|/)([0-9A-Za-z_-]{11})", video_url)
    if not video_id_match:
        raise ValueError("유효하지 않은 YouTube URL")
    video_id = video_id_match.group(1)
    # 영상 상세 정보에서 채널 ID 추출
    api_url = f"{YOUTUBE_API_URL}/videos?part=snippet&id={video_id}&key={YOUTUBE_API_KEY}"
    resp = requests.get(api_url)
    data = resp.json()
    channel_id = data['items'][0]['snippet']['channelId']
    return channel_id

def get_channel_videos(channel_id, max_results=100):
    video_ids = []
    next_page_token = ""
    while len(video_ids) < max_results:
        search_url = f"{YOUTUBE_API_URL}/search?part=snippet&channelId={channel_id}&type=video&maxResults=50&key={YOUTUBE_API_KEY}"
        if next_page_token:
            search_url += f"&pageToken={next_page_token}"
        resp = requests.get(search_url)
        data = resp.json()
        for item in data.get('items', []):
            video_ids.append(item['id']['videoId'])
            if len(video_ids) >= max_results:
                break
        next_page_token = data.get('nextPageToken')
        if not next_page_token:
            break
    return video_ids

# 테스트
video_url = "https://www.youtube.com/watch?v=x0u0iKs2Ub4&t=13s"
channel_id = get_channel_id_from_video_url(video_url)
print("채널 ID:", channel_id)
video_ids = get_channel_videos(channel_id, max_results=100)
print("가져온 영상 개수:", len(video_ids))
print("영상 ID 리스트:", video_ids)

채널 ID: UCyn-K7rZLXjGl7VXGweIlcA
가져온 영상 개수: 100
영상 ID 리스트: ['XbO68-Jkapk', 'NksEV-moMkA', 'nmLO04GfA0k', '9P1ffccFFiw', 'DdVSrx3cOHU', 'CjTbumZOFcM', 'saBMXFDw9SQ', 'p3lQ2A9H_d0', 'PgIJlbWb7Nc', '9pHwI97IFlU', '__Ztp3nLFiQ', 'Y_H578nbBiA', 'lP7Ioi0Xu1A', 'gKa8TOUN94A', '5nzs_M1lHM8', 'JN3RIv7oniE', '1O1JGpwT7ss', 'Ex12Cj8XGWw', 'CDmTQOFcw9g', '_-oaae1jjWs', 'kb410U8gEZ8', 'xXDfp1fNStg', 'venz1kOiz3c', 'LbUcOufWCKs', 'yOuoqUatqZs', 'MdVhJ2mpxI4', 'tuc_c0H0Yn0', '34BJfQ7fH9g', 'irpwZ-y76Zw', 'BcZ6qYrTqIU', 'opgXYFG-K7Q', 'ufXRm1lqf1g', 'Bxw10weIF0c', 'oIA_mKNnUjE', 'RdOryJj-9cQ', 'DJzHEhpCvbo', 'KburR6QBmgg', '5D-b2NrZoQI', 'DC2IiMN0n48', 'p--HSH666H0', '_ER01aZ3PhM', 'OGSbgTWwm0Y', 'O0qfKTrUetg', '3MmbFkAMgA8', '8WEpKIIiDno', '-MvfUUPB5-Y', 'OyVq1NzxjEM', 'al0_T_mO9rM', 'qWbHSOplcvY', 'iK0dSM4FqVs', '0Kar23fZ0F4', 'Mfnwc1dc0MY', 'p7HGmwiuuvE', 'iNh_bXmYfgw', 'LKiuHdFdLWg', '3ET3nXU--nY', 'tqDMACWbDt4', 'q_qHdJDBw6w', '37-BCf85bHk', 'qMRs_G4Agh0', 'J35CD97lhq0', 'ZYr-Vgn2w-8', 'Q25-6ytLuT

In [1]:
import os
import time
import random  # ← 이 줄이 반드시 필요!
import requests
import openai
import re
from supabase import create_client, Client
from dotenv import load_dotenv
from youtube_transcript_api import YouTubeTranscriptApi
load_dotenv()

# 환경 설정
SUPABASE_URL = "https://qkwqilxmwyethocfzofq.supabase.co"
SUPABASE_KEY = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJzdXBhYmFzZSIsInJlZiI6InFrd3FpbHhtd3lldGhvY2Z6b2ZxIiwicm9sZSI6ImFub24iLCJpYXQiOjE3NTE4NzU4MDEsImV4cCI6MjA2NzQ1MTgwMX0.WqGljJzf5Jt2vY3gHjpa7gQ9JbMqaZ_u3TXM-YIc79g"
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
YOUTUBE_API_KEY = os.getenv("YOUTUBE_API_KEY")
openai.api_key = OPENAI_API_KEY
supabase: Client = create_client(SUPABASE_URL, SUPABASE_KEY)

YOUTUBE_API_URL = "https://www.googleapis.com/youtube/v3"

def get_youtube_transcript_by_id(video_id: str) -> str:
    languages = ["ko", "en"]
    try:
        transcript_list = YouTubeTranscriptApi.get_transcript(video_id, languages=languages)
        transcript_text = " ".join([entry["text"] for entry in transcript_list])
        return transcript_text
    except Exception as e:
        raise RuntimeError(f"비디오 ID '{video_id}'에 대한 자막을 찾을 수 없거나 사용할 수 없습니다. {e}")

def chunk_transcript(transcript: str, chunk_size: int = 300) -> list:
    return [transcript[i:i+chunk_size] for i in range(0, len(transcript), chunk_size)]

def save_channel_youtube_embeddings(channel_id: str, max_results: int = 100) -> str:
    openai.api_key = OPENAI_API_KEY
    new_video_ids = []
    next_page_token = ""
    tried_video_ids = set()

    print(f"[INFO] 채널ID: {channel_id} 영상 수집 시작")
    while len(new_video_ids) < max_results:
        search_url = (
            f"{YOUTUBE_API_URL}/search?part=snippet&channelId={channel_id}"
            f"&maxResults=50&order=date&type=video&key={YOUTUBE_API_KEY}"
        )
        if next_page_token:
            search_url += f"&pageToken={next_page_token}"
        try:
            resp = requests.get(search_url)
            data = resp.json()
        except Exception as e:
            print(f"[ERR] 유튜브 API 요청 실패: {e}")
            time.sleep(random.uniform(2, 5))
            continue

        page_video_ids = [item["id"]["videoId"] for item in data.get("items", [])]
        if not page_video_ids:
            break

        try:
            resp_db = supabase.table("youtube_videos").select("video_id").in_("video_id", page_video_ids).execute()
            existing_ids = set(row["video_id"] for row in resp_db.data)
        except Exception as e:
            print(f"[ERR] 기존 영상 조회 실패: {e}")
            existing_ids = set()

        for vid in page_video_ids:
            if vid not in existing_ids and vid not in new_video_ids and vid not in tried_video_ids:
                new_video_ids.append(vid)
                if len(new_video_ids) >= max_results:
                    break
            tried_video_ids.add(vid)

        next_page_token = data.get("nextPageToken")
        if not next_page_token:
            break

    if not new_video_ids:
        print("저장할 새로운 영상이 없습니다.")
        return "저장할 새로운 영상이 없습니다."

    count = 0
    for i, video_id in enumerate(new_video_ids):
        url = f"https://www.youtube.com/watch?v={video_id}"
        print(f"[DEBUG] 자막 추출 시도: {url}")
        time.sleep(random.uniform(2, 5))
        try:
            transcript = get_youtube_transcript_by_id(video_id)
            print(f"[OK] 자막 추출 성공: {video_id}")
        except Exception as e:
            print(f"[ERR] 자막 가져오기 실패: {video_id} - {e}")
            continue
        chunks = chunk_transcript(transcript, chunk_size=300)
        for chunk_idx, chunk in enumerate(chunks):
            try:
                time.sleep(1)
                embedding = openai.embeddings.create(
                    input=chunk,
                    model="text-embedding-3-small"
                ).data[0].embedding
            except Exception as e:
                print(f"[ERR] 임베딩 실패: {video_id} chunk {chunk_idx} - {e}")
                continue
            try:
                supabase.table("youtube_videos").insert({
                    "video_id": video_id,
                    "url": url,
                    "chunk_index": chunk_idx,
                    "chunk_text": chunk,
                    "embedding": embedding
                }).execute()
                print(f"[SAVE] 저장 완료: {video_id} chunk {chunk_idx}")
                count += 1
            except Exception as e:
                print(f"[ERR] 저장 실패: {video_id} chunk {chunk_idx} - {e}")
                continue

    return f"총 {count}개 자막 청크가 저장되었습니다."




In [2]:
# 사용 예시
channel_id = "UCg-p3lQIqmhh7gHpyaOmOiQ"
result = save_channel_youtube_embeddings(channel_id, max_results=100)
print(result)

[INFO] 채널ID: UCg-p3lQIqmhh7gHpyaOmOiQ 영상 수집 시작
[DEBUG] 자막 추출 시도: https://www.youtube.com/watch?v=6-_MZENrB24
[OK] 자막 추출 성공: 6-_MZENrB24
[SAVE] 저장 완료: 6-_MZENrB24 chunk 0
[SAVE] 저장 완료: 6-_MZENrB24 chunk 1
[SAVE] 저장 완료: 6-_MZENrB24 chunk 2
[SAVE] 저장 완료: 6-_MZENrB24 chunk 3
[SAVE] 저장 완료: 6-_MZENrB24 chunk 4
[SAVE] 저장 완료: 6-_MZENrB24 chunk 5
[SAVE] 저장 완료: 6-_MZENrB24 chunk 6
[SAVE] 저장 완료: 6-_MZENrB24 chunk 7
[SAVE] 저장 완료: 6-_MZENrB24 chunk 8
[SAVE] 저장 완료: 6-_MZENrB24 chunk 9
[SAVE] 저장 완료: 6-_MZENrB24 chunk 10
[DEBUG] 자막 추출 시도: https://www.youtube.com/watch?v=qSpNhKrb6dU
[OK] 자막 추출 성공: qSpNhKrb6dU
[SAVE] 저장 완료: qSpNhKrb6dU chunk 0
[DEBUG] 자막 추출 시도: https://www.youtube.com/watch?v=UgcAIwu9ySY
[OK] 자막 추출 성공: UgcAIwu9ySY
[SAVE] 저장 완료: UgcAIwu9ySY chunk 0
[SAVE] 저장 완료: UgcAIwu9ySY chunk 1
[DEBUG] 자막 추출 시도: https://www.youtube.com/watch?v=MvNt1FFL5k0
[OK] 자막 추출 성공: MvNt1FFL5k0
[SAVE] 저장 완료: MvNt1FFL5k0 chunk 0
[SAVE] 저장 완료: MvNt1FFL5k0 chunk 1
[SAVE] 저장 완료: MvNt1FFL5k0 chunk 2
[SAVE] 저장 완료: MvNt

KeyboardInterrupt: 

In [10]:
def search_youtube_videos(query: str) :
    """유튜브에서 특정 키워드로 동영상을 검색하고 세부 정보를 가져옵니다"""
    try:
        # 1. 동영상 검색
        max_results: int = 20
        search_url = f"{YOUTUBE_API_URL}/search?part=snippet&q={requests.utils.quote(query)}&type=video&maxResults={max_results}&key={YOUTUBE_API_KEY}"
        print(f"Searching YouTube with URL: {search_url}")

        search_response = requests.get(search_url)
        search_data = search_response.json()
        video_ids = [item['id']['videoId'] for item in search_data.get('items', [])]

        if not video_ids:
            print("No videos found for the query.")
            return []

        video_details_url = f"{YOUTUBE_API_URL}/videos?part=snippet,statistics&id={','.join(video_ids)}&key={YOUTUBE_API_KEY}"
        print(f"영상 정보 가져오는 중: {video_details_url}")
        details_response = requests.get(video_details_url)
        details_response.raise_for_status()
        details_data = details_response.json()

        videos = []
        for item in details_data.get('items', []):
            snippet = item.get('snippet', {})
            statistics = item.get('statistics', {})
            thumbnails = snippet.get('thumbnails', {})
            high_thumbnail = thumbnails.get('high', {}) 
            view_count = statistics.get('viewCount')
            like_count = statistics.get('likeCount')

            video_card = {
                "title": snippet.get('title', 'N/A'),
                "publishedDate": snippet.get('publishedAt', ''),
                "channelName": snippet.get('channelTitle', 'N/A'),
                "channelId": snippet.get('channelId', ''),
                "thumbnailUrl": high_thumbnail.get('url', ''),
                "viewCount": int(view_count) if view_count is not None else None,
                "likeCount": int(like_count) if like_count is not None else None,
                "url": f"https://www.youtube.com/watch?v={item.get('id', '')}",
            }
            videos.append(video_card)

        if not videos:
            print("No video details could be fetched.")
            return []

        return videos

    except Exception as e:
        print(f"Error: {e}")
        return []



In [11]:
videos = search_youtube_videos("AI Agents")
for video in videos:
    pprint(video)


Searching YouTube with URL: https://www.googleapis.com/youtube/v3/search?part=snippet&q=AI%20Agents&type=video&maxResults=20&key=AIzaSyAOz5efF0LiQ3txpyOO1qd8LLW49v_5gC8
영상 정보 가져오는 중: https://www.googleapis.com/youtube/v3/videos?part=snippet,statistics&id=O2gerCxEXvc,geR9PeCuHK4,F8NKVhkZZWI,FwOTs4UxQS4,hLJTcVHW8_I,eHEHE2fpnWQ,ShhVdkgzqUI,BF2k_fKuCVM,qU3fmidNbJE,HISRUrJsD08,fXizBc03D7E,gGy-JDcOwlI,77NpesUmr5Q,wazHMMaiDEA,LP5OCa20Zpg,2aC2ly7vKtM,_4jVeVkUOzs,Ctpo_gsGQJs,wkuwMqrOXFE,WfIJhS_1xIk&key=AIzaSyAOz5efF0LiQ3txpyOO1qd8LLW49v_5gC8
{'channelId': 'UCh9nVJoWXmFb7sLApWGcLPQ',
 'channelName': 'codebasics',
 'likeCount': 542,
 'publishedDate': '2025-06-30T16:10:56Z',
 'thumbnailUrl': 'https://i.ytimg.com/vi/O2gerCxEXvc/hqdefault.jpg',
 'title': 'Generative ai vs AI agents vs Agentic AI',
 'url': 'https://www.youtube.com/watch?v=O2gerCxEXvc',
 'viewCount': 16387}
{'channelId': 'UCWZwfV3ICOt3uEPpW6hYK4g',
 'channelName': 'AI Foundations',
 'likeCount': 977,
 'publishedDate': '2025-06-30T14:4

In [None]:

def get_channel_info(video_url: str) -> dict:
    """YouTube 동영상 URL로부터 채널 정보와 최근 100개의 동영상을 가져옵니다"""
    def extract_video_id(url):
        match = re.search(r"(?:v=|\/)([0-9A-Za-z_-]{11})", url)
        return match.group(1) if match else None
    def fetch_recent_videos(channel_id):
        rss_url = f"https://www.youtube.com/feeds/videos.xml?channel_id={channel_id}"
        try:
            response = requests.get(rss_url)
            if response.status_code != 200:
                return []

            root = ET.fromstring(response.text)
            ns = {'atom': 'http://www.w3.org/2005/Atom'}
            videos = []

            for entry in root.findall('.//atom:entry', ns)[:100]:  
                title = entry.find('./atom:title', ns).text
                link = entry.find('./atom:link', ns).attrib['href']
                published = entry.find('./atom:published', ns).text
                videos.append({
                    'title': title,
                    'link': link,
                    'published': published,
                    'updatedDate': datetime.now().strftime("%Y-%m-%d %H:%M:%S")
                })

            return videos
        except:
            return []

    video_id = extract_video_id(video_url)
    if not video_id:
        raise ValueError("Invalid YouTube URL")

    video_api = f"{YOUTUBE_API_URL}/videos?part=snippet,statistics&id={video_id}&key={YOUTUBE_API_KEY}"
    video_data = requests.get(video_api).json()
    if not video_data.get('items'):
        raise ValueError("No video found")

    video_info = video_data['items'][0]
    channel_id = video_info['snippet']['channelId']

    channel_api = f"{YOUTUBE_API_URL}/channels?part=snippet,statistics&id={channel_id}&key={YOUTUBE_API_KEY}"
    channel_data = requests.get(channel_api).json()['items'][0]

    return {
        'channelTitle': channel_data['snippet']['title'],
        'channelUrl': f"https://www.youtube.com/channel/{channel_id}",
        'subscriberCount': channel_data['statistics'].get('subscriberCount', '0'),
        'viewCount': channel_data['statistics'].get('viewCount', '0'),
        'videoCount': channel_data['statistics'].get('videoCount', '0'),
        'videos': fetch_recent_videos(channel_id)
    }

In [14]:
pprint(get_channel_info("https://www.youtube.com/watch?v=gYqs-wUKZsM")) ## openai 영상

{'channelTitle': 'OpenAI',
 'channelUrl': 'https://www.youtube.com/channel/UCXZCJLdBC09xxGZ6gcdrc6A',
 'subscriberCount': '1580000',
 'videoCount': '302',
 'videos': [{'link': 'https://www.youtube.com/watch?v=atXyXP3yYZ4',
             'published': '2025-07-01T14:50:39+00:00',
             'title': 'Inside ChatGPT, AI assistants, and building at OpenAI — '
                      'the OpenAI Podcast Ep. 2',
             'updatedDate': '2025-07-02 11:24:24'},
            {'link': 'https://www.youtube.com/shorts/XGswiRpn6js',
             'published': '2025-06-27T19:51:05+00:00',
             'title': 'A quick guide | How to search in ChatGPT',
             'updatedDate': '2025-07-02 11:24:24'},
            {'link': 'https://www.youtube.com/shorts/rswUgIfj1YU',
             'published': '2025-06-25T18:32:15+00:00',
             'title': 'UCLA student shares how he was using ChatGPT',
             'updatedDate': '2025-07-02 11:24:24'},
            {'link': 'https://www.youtube.com/shorts/E0

In [None]:
import openai
from supabase import create_client, Client
import os

SUPABASE_URL="https://qkwqilxmwyethocfzofq.supabase.co"
SUPABASE_KEY="eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJzdXBhYmFzZSIsInJlZiI6InFrd3FpbHhtd3lldGhvY2Z6b2ZxIiwicm9sZSI6ImFub24iLCJpYXQiOjE3NTE4NzU4MDEsImV4cCI6MjA2NzQ1MTgwMX0.WqGljJzf5Jt2vY3gHjpa7gQ9JbMqaZ_u3TXM-YIc79g"
supabase: Client = create_client(SUPABASE_URL, SUPABASE_KEY)

openai.api_key = os.getenv("OPENAI_API_KEY")

def search_similar_youtube_video(query: str) -> dict:
    """검색어를 임베딩하고 Supabase RPC를 통해 가장 유사한 자막 청크(및 비디오) 정보를 반환"""
    try:
        # 1. OpenAI를 사용해 쿼리 임베딩 생성
        embedding_response = openai.embeddings.create(
            input=query,
            model="text-embedding-3-small"
        )
        embedding = embedding_response.data[0].embedding

        # 2. Supabase RPC 호출 (input_vector는 JSON 형태 리스트 그대로 넘김)
        response = supabase.rpc("match_youtube_video", {
            "input_vector": embedding
        }).execute()

        # 3. 결과 반환
        if response.data and len(response.data) > 0:
            result = response.data[0]
            return {
                "video_id": result.get("video_id"),
                "url": result.get("url"),
                "chunk_index": result.get("chunk_index"),
                "chunk_text": result.get("chunk_text"),
                "score": result.get("score", None)
            } 
        else:
            return {"error": "No similar video found."}

    except Exception as e:
        import traceback
        traceback.print_exc()
        return {"error": str(e)}

In [9]:
result = search_similar_youtube_video("짜파게티")
print(result)

{'video_id': '6-_MZENrB24', 'url': 'https://www.youtube.com/watch?v=6-_MZENrB24', 'chunk_index': 2, 'chunk_text': '막으로 고스트 페퍼 라면 짜파게티네. 짜파게티라고 생각하고 먹으면 짜파게티야. 오 좋아. 많이 먹었어. 어때? 괜찮나 본데? 생각보다 와 어떡해 와 와 근데 이게 덤점 오네어 아 오 이겨 냈어 캡사이신 매운 맛이랑 느낌이 달라 캡사이신은 혀를 막 자르고 싶은 느낌이 있잖아 근데 이거는 혀를 그 무슨 느낌인지 알겠죠 어 이게 나 내가 보기 이게 더 맵지 아니 이미 이거 먹어서 감각이 SM타운에서 공연하셨잖아요. 런던에서 공연 처음 하는 건 기억나세요? 아마 런던의 첫 케팝 페스티벌이었던 거 같은데 2012년 맞아요. 그때 생각하니가 ', 'score': 0.390624034684189}
