In [1]:
from infrastructure.database.mongo_client import MongoDBClient
from infrastructure.repository import YouTubeContentRepository

import os
from dotenv import load_dotenv

load_dotenv()

# MongoDB 클라이언트 초기화
client = MongoDBClient(uri=os.environ['MONGO_CONNECTION_STRING'])
client.connect()

# 저장소 초기화
repository = YouTubeContentRepository(client)

Connected to MongoDB


In [3]:
contents = repository.find_all()

content = contents[0]

video_link = content.url

import yt_dlp
import requests

def get_meta(video_link):
    ydl_opts = {
        'quiet': True,
        'skip_download': True,  # 영상 다운로드 생략
        'writeautomaticsub': True,  # 자동 생성된 자막 활성화
        'http_headers': {
            'Accept-Language': 'ko',  # HTTP 요청에 한국어 언어 설정
        },
    }
    video_url = video_link.url
    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
        info = ydl.extract_info(video_url, download=False)
        # 자동 생성된 자막 확인
        captions = info.get('automatic_captions', {}).get('ko')

        if captions:
            json_url = captions[0]['url']  # 첫 번째 JSON 링크 가져오기
            print(f"Subtitles JSON URL: {json_url}")

            # JSON 데이터 요청
            response = requests.get(json_url)
            if response.status_code == 200:
                return response.json()  # JSON 데이터 반환
            else:
                print("Failed to fetch subtitle JSON.")
                return None
        else:
            raise Exception("자막이 없습니다.")

print(content.url)
def parse_subtitles(json_data):
    """JSON 데이터에서 자막 텍스트를 추출합니다."""
    events = json_data.get('events', [])
    subtitles = []

    for event in events:
        segments = event.get('segs', [])
        for seg in segments:
            text = seg.get('utf8', '') # 자막 텍스트를 가져옴
            if text:  # 빈 문자열이 아닌 경우에만 추가
                # 이전 텍스트와 이어붙이는 방식으로 처리
                text = text.replace('\n', ' ')
                subtitles.append(text)  # 새로운 문장 시작

    return ''.join(subtitles)  # 전체 텍스트 반환
a = get_meta(video_link)
print(a)
print(parse_subtitles(a))

https://www.youtube.com/watch?v=qOTbP9ciJ88
Subtitles JSON URL: https://www.youtube.com/api/timedtext?v=qOTbP9ciJ88&ei=wkKXZ6joNuSW1d8Ph6Tv4A8&caps=asr&opi=112496729&xoaf=5&hl=en&ip=0.0.0.0&ipbits=0&expire=1737991474&sparams=ip%2Cipbits%2Cexpire%2Cv%2Cei%2Ccaps%2Copi%2Cxoaf&signature=301E795F47F26030DA79937EB85765A00720D08C.7CFD26591EF55C9C8930CC379E1047B956622ACF&key=yt8&kind=asr&lang=ko&fmt=json3
{'wireMagic': 'pb3', 'pens': [{}], 'wsWinStyles': [{}, {'mhModeHint': 2, 'juJustifCode': 0, 'sdScrollDir': 3}], 'wpWinPositions': [{}, {'apPoint': 6, 'ahHorPos': 20, 'avVerPos': 100, 'rcRows': 2, 'ccCols': 40}], 'events': [{'tStartMs': 0, 'dDurationMs': 1481499, 'id': 1, 'wpWinPosId': 1, 'wsWinStyleId': 1}, {'tStartMs': 0, 'dDurationMs': 11880, 'wWinId': 1, 'segs': [{'utf8': '[음악]'}]}, {'tStartMs': 7789, 'dDurationMs': 4091, 'wWinId': 1, 'aAppend': 1, 'segs': [{'utf8': '\n'}]}, {'tStartMs': 7799, 'dDurationMs': 4081, 'wWinId': 1, 'segs': [{'utf8': '자', 'acAsrConf': 222}, {'utf8': ' 여러분들', 't

In [2]:
contents = repository.find_all()

content = contents[0]
video_link = content.url

from application.strategy import AutoYoutubeStrategy

stt_strategy = AutoYoutubeStrategy()

result = stt_strategy.transcribe(video_link.url)

In [3]:
print(result)

{'wireMagic': 'pb3', 'pens': [{}], 'wsWinStyles': [{}, {'mhModeHint': 2, 'juJustifCode': 0, 'sdScrollDir': 3}], 'wpWinPositions': [{}, {'apPoint': 6, 'ahHorPos': 20, 'avVerPos': 100, 'rcRows': 2, 'ccCols': 40}], 'events': [{'tStartMs': 0, 'dDurationMs': 1481499, 'id': 1, 'wpWinPosId': 1, 'wsWinStyleId': 1}, {'tStartMs': 0, 'dDurationMs': 11880, 'wWinId': 1, 'segs': [{'utf8': '[음악]'}]}, {'tStartMs': 7789, 'dDurationMs': 4091, 'wWinId': 1, 'aAppend': 1, 'segs': [{'utf8': '\n'}]}, {'tStartMs': 7799, 'dDurationMs': 4081, 'wWinId': 1, 'segs': [{'utf8': '자', 'acAsrConf': 222}, {'utf8': ' 여러분들', 'tOffsetMs': 481, 'acAsrConf': 222}, {'utf8': ' 반갑습니다', 'tOffsetMs': 840, 'acAsrConf': 222}, {'utf8': ' 이번에는', 'tOffsetMs': 1081, 'acAsrConf': 222}]}, {'tStartMs': 12049, 'wWinId': 1, 'aAppend': 1, 'segs': [{'utf8': '\n'}]}, {'tStartMs': 12059, 'dDurationMs': 2281, 'wWinId': 1, 'segs': [{'utf8': '뱀사이크', 'acAsrConf': 222}]}, {'tStartMs': 13009, 'dDurationMs': 1331, 'wWinId': 1, 'aAppend': 1, 'segs': [{

In [2]:
contents = repository.find_all()

content = contents[0]
video_link = content.url

from application.strategy import STTStrategyFactory, STTStrategyType

stt_strategy = STTStrategyFactory.create(STTStrategyType.AUTO_YOUTUBE)

result = stt_strategy.transcribe_to_script(video_link.url)
print(result)

YouTubeScript(script='[음악] 자 여러분들 반갑습니다 이번에는 뱀사이크 언제든 서바이버 개발에 첫 번째 시간으로...', chunks=[(0.0-11.88: [음악]...), (7.799-11.88: 자 여러분들 반갑습니다 이번...), (12.059-14.34: 뱀사이크...), ...] (641 chunks))


In [2]:
from infrastructure.database.mongo_client import MongoDBClient
from infrastructure.repository import YouTubeContentRepository

import os
from dotenv import load_dotenv

load_dotenv()

# MongoDB 클라이언트 초기화
client = MongoDBClient(uri=os.environ['MONGO_CONNECTION_STRING'])
client.connect()

# 저장소 초기화
repository = YouTubeContentRepository(client)

from application.strategy import STTStrategyFactory, STTStrategyType

stt_strategy = STTStrategyFactory.create(STTStrategyType.AUTO_YOUTUBE)

contents = repository.find_all()

count = len(contents)
for i, content in enumerate(contents):
    print(f'{i + 1}/{count}: {content.title}')
    script_auto = stt_strategy.transcribe(content.url.url)
    print(script_auto)

    if script_auto is not None:
        print(script_auto)
        pass
        #content.set_script_auto(script_auto)
        #repository.save(content)
    #script_auto = stt_strategy.transcribe(content.url.url)
    #print(script_auto)
    #content.set_script_auto(script_auto)
    #repository.save(content)

Connected to MongoDB
1/142: 2D 오브젝트🧍만들기 [유니티 뱀서라이크 01]
https://www.youtube.com/api/timedtext?v=qOTbP9ciJ88&ei=pE6XZ8aBJtmrvcAPkv2HqA4&caps=asr&opi=112496729&xoaf=5&hl=en&ip=0.0.0.0&ipbits=0&expire=1737994516&sparams=ip%2Cipbits%2Cexpire%2Cv%2Cei%2Ccaps%2Copi%2Cxoaf&signature=BDBE13293A6302A3E027C56FBA0F98ABE699388B.BF8441C7A14CD5DE22F1424F115AAD6EB5BBF775&key=yt8&kind=asr&lang=ko&fmt=json3
json
{'wireMagic': 'pb3', 'pens': [{}], 'wsWinStyles': [{}, {'mhModeHint': 2, 'juJustifCode': 0, 'sdScrollDir': 3}], 'wpWinPositions': [{}, {'apPoint': 6, 'ahHorPos': 20, 'avVerPos': 100, 'rcRows': 2, 'ccCols': 40}], 'events': [{'tStartMs': 0, 'dDurationMs': 1481499, 'id': 1, 'wpWinPosId': 1, 'wsWinStyleId': 1}, {'tStartMs': 0, 'dDurationMs': 11880, 'wWinId': 1, 'segs': [{'utf8': '[음악]'}]}, {'tStartMs': 7789, 'dDurationMs': 4091, 'wWinId': 1, 'aAppend': 1, 'segs': [{'utf8': '\n'}]}, {'tStartMs': 7799, 'dDurationMs': 4081, 'wWinId': 1, 'segs': [{'utf8': '자', 'acAsrConf': 222}, {'utf8': ' 여러분들', 'tOffs

KeyboardInterrupt: 

In [None]:
#print('complete')

In [3]:
!pip install youtube-transcript-api


Collecting youtube-transcript-api
  Downloading youtube_transcript_api-0.6.3-py3-none-any.whl.metadata (17 kB)
Downloading youtube_transcript_api-0.6.3-py3-none-any.whl (622 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m622.3/622.3 kB[0m [31m15.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: youtube-transcript-api
Successfully installed youtube-transcript-api-0.6.3


In [2]:
from infrastructure.database.mongo_client import MongoDBClient
from infrastructure.repository import YouTubeContentRepository

from youtube_transcript_api import YouTubeTranscriptApi

import os
from dotenv import load_dotenv

load_dotenv()

# MongoDB 클라이언트 초기화
client = MongoDBClient(uri=os.environ['MONGO_CONNECTION_STRING'])
client.connect()

# 저장소 초기화
repository = YouTubeContentRepository(client)

from strategy import STTStrategyFactory, STTStrategyType

stt_strategy = STTStrategyFactory.create(STTStrategyType.AUTO_YOUTUBE)

contents = repository.find_all()

count = len(contents)
for i, content in enumerate(contents):
    print(f'{i + 1}/{count}: {content.title}: {content.url.url}')
    result = stt_strategy.transcribe_to_script(content.video_id)
    content.set_script_auto(result)

    if result is None:
        print("없어용")

    repository.save(content)

Connected to MongoDB
1/142: 2D 오브젝트🧍만들기 [유니티 뱀서라이크 01]: https://www.youtube.com/watch?v=qOTbP9ciJ88
2/142: 플레이어 이동🚶구현하기 [유니티 뱀서라이크 02]: https://www.youtube.com/watch?v=YAu4yWU5D5U
3/142: 새로운 인풋시스템🎮적용하기 [유니티 뱀서라이크 02+]: https://www.youtube.com/watch?v=tKMnVBTes0M
4/142: 2D 셀 애니메이션🏃제작하기 [유니티 뱀서라이크 03]: https://www.youtube.com/watch?v=vizfd1TeRMI
5/142: 무한🌍맵 이동 [유니티 뱀서라이크 04]: https://www.youtube.com/watch?v=SNjgew0VhHY
6/142: 몬스터🧟만들기 [유니티 뱀서라이크 05]: https://www.youtube.com/watch?v=0aUCu1BcZxs
7/142: 오브젝트 풀링🏊으로 소환하기 [유니티 뱀서라이크 06]: https://www.youtube.com/watch?v=A7mfPH8jyBE
8/142: 소환 레벨⏳적용하기 [유니티 뱀서라이크 06+]: https://www.youtube.com/watch?v=SSg_9q-8h-A
9/142: 회전하는🪓근접무기 구현 [유니티 뱀서라이크 07]: https://www.youtube.com/watch?v=HPJVVcRKwn0
10/142: 자동🎯원거리 공격 구현 [유니티 뱀서라이크 08]: https://www.youtube.com/watch?v=dBQHtMI-Og4
11/142: 타격감🌟있는 몬스터 처치 만들기 [유니티 뱀서라이크 09]: https://www.youtube.com/watch?v=JRaIlTEmvHY
12/142: HUD📐제작하기 [유니티 뱀서라이크 10]: https://www.youtube.com/watch?v=ip0xffLSWlk
13/142: 능력 업그레이드💪구