In [1]:
import pandas as pd
from yt_dlp import YoutubeDL

In [2]:
import os
os.makedirs('downloads', exist_ok=True)

In [3]:
DOWNLOAD_DIR  = './downloads'

In [4]:
class YT():
    def __init__(self, filename, info_dict):
        self.filename = filename
        self.info_dict = info_dict
        self.audio_filepath = self.get_audio_filepath()
        self.caption_filepath = self.get_caption_filepath()

    def get_audio_filepath(self):
        return os.path.join(DOWNLOAD_DIR, self.info_dict['fulltitle'] + '.mp3')

    def get_caption_filepath(self):
        if self.info_dict.get('requested_subtitles'):
            ext = list(self.info_dict['requested_subtitles'].keys())[0]
            return os.path.join(DOWNLOAD_DIR, self.info_dict['fulltitle'] + '.' + ext + '.srt')
        else:
            return None

In [32]:
urls = [
    # 'https://www.youtube.com/watch?v=4kzl1b5hsgg',
    'https://www.youtube.com/watch?v=qdAJqG4HTSU'
    ]
ydl_opts = {
    'format': 'bestaudio',
    'writesubtitles': True,
    'subtitlesformat': 'srt',
    'outtmpl': {
        'default': 'downloads/%(title)s.%(ext)s',
    },

    # ℹ️ See help(yt_dlp.postprocessor) for a list of available Postprocessors and their arguments
    'postprocessors': [
        {  # Extract audio using ffmpeg
            'key': 'FFmpegExtractAudio',
            'preferredcodec': 'mp3',
        },
        {  # Convert subtitles to srt
            'key': 'FFmpegSubtitlesConvertor',
            'format': 'srt'
        }
    ]
}

In [33]:
with YoutubeDL(ydl_opts) as ydl:
    info_dict = ydl.extract_info(urls[0], download=False)
    filename = ydl.prepare_filename(info_dict)
    ydl.download(urls)

[youtube] Extracting URL: https://www.youtube.com/watch?v=qdAJqG4HTSU
[youtube] qdAJqG4HTSU: Downloading webpage
[youtube] qdAJqG4HTSU: Downloading ios player API JSON
[youtube] qdAJqG4HTSU: Downloading m3u8 information
[info] qdAJqG4HTSU: Downloading subtitles: zh-TW




[youtube] Extracting URL: https://www.youtube.com/watch?v=qdAJqG4HTSU
[youtube] qdAJqG4HTSU: Downloading webpage
[youtube] qdAJqG4HTSU: Downloading ios player API JSON
[youtube] qdAJqG4HTSU: Downloading m3u8 information
[info] qdAJqG4HTSU: Downloading subtitles: zh-TW




[info] qdAJqG4HTSU: Downloading 1 format(s): 251
[info] Writing video subtitles to: downloads/艾蜜莉AMILI - ＂SPIDA＂ (Official Music Video).zh-TW.vtt
[download] Destination: downloads/艾蜜莉AMILI - ＂SPIDA＂ (Official Music Video).zh-TW.vtt
[download] 100% of    4.13KiB in 00:00:00 at 178.91KiB/s
[download] Destination: downloads/艾蜜莉AMILI - ＂SPIDA＂ (Official Music Video).webm
[download] 100% of    2.66MiB in 00:00:00 at 4.85MiB/s   
[ExtractAudio] Destination: downloads/艾蜜莉AMILI - ＂SPIDA＂ (Official Music Video).mp3
Deleting original file downloads/艾蜜莉AMILI - ＂SPIDA＂ (Official Music Video).webm (pass -k to keep)


In [29]:
list(info_dict['requested_subtitles'].keys())[0]

'zh-TW'

In [31]:
info_dict

{'id': 'qdAJqG4HTSU',
 'title': '艾蜜莉AMILI - "SPIDA" (Official Music Video)',
 'formats': [{'format_id': 'sb3',
   'format_note': 'storyboard',
   'ext': 'mhtml',
   'protocol': 'mhtml',
   'acodec': 'none',
   'vcodec': 'none',
   'url': 'https://i.ytimg.com/sb/qdAJqG4HTSU/storyboard3_L0/default.jpg?sqp=-oaymwENSDfyq4qpAwVwAcABBqLzl_8DBgjBiqqzBg==&sigh=rs$AOn4CLDALy65cb4c93t4tQQMSRvcWWjFXA',
   'width': 48,
   'height': 27,
   'fps': 0.6060606060606061,
   'rows': 10,
   'columns': 10,
   'fragments': [{'url': 'https://i.ytimg.com/sb/qdAJqG4HTSU/storyboard3_L0/default.jpg?sqp=-oaymwENSDfyq4qpAwVwAcABBqLzl_8DBgjBiqqzBg==&sigh=rs$AOn4CLDALy65cb4c93t4tQQMSRvcWWjFXA',
     'duration': 165.0}],
   'resolution': '48x27',
   'aspect_ratio': 1.78,
   'filesize_approx': None,
   'http_headers': {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.101 Safari/537.36', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;

In [7]:
filename

'downloads/艾蜜莉AMILI - ＂SPIDA＂ (Official Music Video).webm'

In [66]:
info_dict['title']

'艾蜜莉AMILI - "SPIDA" (Official Music Video)'

In [29]:
subtitles_metadata = info_dict['requested_subtitles']

In [30]:
subtitles_metadata

{'zh-TW': {'ext': 'vtt',
  'url': 'https://www.youtube.com/api/timedtext?v=qdAJqG4HTSU&ei=WotuZvyCJ_312roPpOerUA&caps=asr&opi=112496729&xoaf=4&hl=en&ip=0.0.0.0&ipbits=0&expire=1718545866&sparams=ip%2Cipbits%2Cexpire%2Cv%2Cei%2Ccaps%2Copi%2Cxoaf&signature=8940125CB263E3C10D52901C9726EC3B5EAB685D.989214CBDBA870DED2AF4429A134C6F4C96205B3&key=yt8&lang=zh-TW&fmt=vtt',
  'name': 'Chinese (Taiwan)'}}

In [5]:
from faster_whisper import WhisperModel

  from .autonotebook import tqdm as notebook_tqdm


In [6]:
model = WhisperModel('small')



In [7]:
segments, _ = model.transcribe(
    audio_title + '.mp3',
    vad_filter=True,
    temperature=0.001
)

In [52]:
segments_result = []
for segment in segments:
    segments_result.append(segment)

In [8]:
for segment in segments:
    print("[%.2fs -> %.2fs] %s" % (segment.start, segment.end, segment.text))

[0.05s -> 4.25s] OK OK OK
[4.25s -> 5.85s] 這樣子
[5.85s -> 14.97s] 我心裡有問題
[14.97s -> 16.61s] 沒理由反應
[16.61s -> 18.29s] 看我的反應有主要應
[18.29s -> 19.69s] 有主觀有反應
[19.69s -> 22.25s] 今天我們在布拉格這個一個
[22.25s -> 25.33s] 已經有點快廢棄的火車上裡面
[25.33s -> 26.21s] 我在講一次
[26.21s -> 29.61s] 我們的影片全部都是死景拍攝
[29.61s -> 32.17s] 我們真的是增強實彈
[32.17s -> 33.73s] 在認真做事情
[33.73s -> 35.13s] 沒有在跟你喊
[35.13s -> 36.97s] 做事情
[36.97s -> 38.13s] 做事情
[38.13s -> 39.53s] 增強實彈
[39.53s -> 41.53s] 做事情
[41.53s -> 42.37s] 不做事情
[42.37s -> 42.77s] 不
[44.01s -> 44.73s] 當然不做
[44.73s -> 45.17s] 不會
[45.17s -> 46.61s] 有性這樣
[46.61s -> 47.41s] 素角就這樣死
[47.41s -> 48.61s] 有性
[48.61s -> 49.61s] 我可以
[51.01s -> 52.21s] 前陣子我的好朋友
[52.21s -> 52.73s] J-sion
[52.73s -> 54.37s] 他出了一支新的歌
[54.37s -> 55.77s] 你們真的是朋友嗎
[55.77s -> 57.25s] 因為最近J-sion真的很忙
[57.25s -> 58.85s] 他一直都沒有來找我
[58.85s -> 60.41s] 但是我們都還是會打電話
[60.41s -> 62.05s] 聽到他的聲音就會覺得很開心
[62.05s -> 63.29s] 但有時候一次一聽
[63.29s -> 64.41s] 又是30分鐘起
[64.41s -> 65.61s] 然後那個額度又會受不了
[65.61s -> 66.93s] 對 有時候是這個樣子的
[66.93s -> 

In [47]:
from dotenv import load_dotenv, find_dotenv
import getpass
import os
_ = load_dotenv(find_dotenv()) # read local .env file

if os.getenv("OPENAI_API_KEY"):
  print("GET OPENAI_API_KEY")
  OPENAI_API  = os.environ['OPENAI_API_KEY']
else:
    api_key = getpass.getpass("Enter your OpenAI API key: ")

GET OPENAI_API_KEY


In [41]:
import pysrt

# 讀取 .srt 文件
subs = pysrt.open('downloads/艾蜜莉AMILI - ＂SPIDA＂ (Official Music Video).zh-TW.srt')

def timestamp_to_seconds(timestamp):
    return timestamp.hours * 3600 + timestamp.minutes * 60 + timestamp.seconds + timestamp.milliseconds / 1000.0


def timestamp_to_seconds(timestamp):
    return timestamp.hours * 3600 + timestamp.minutes * 60 + timestamp.seconds + timestamp.milliseconds / 1000.0

# 合併所有字幕內容，包含開始和結束時間
all_text = '\n'.join(
    "[%.2fs -> %.2fs] %s" % (
        timestamp_to_seconds(sub.start), 
        timestamp_to_seconds(sub.end), 
        sub.text.replace('\n', ' ')
    ) for sub in subs
)



print(all_text)


[0.93s -> 2.23s] (給我一個機會)
[3.66s -> 5.05s] (怎麼給你機會)
[7.14s -> 8.92s] (我以前沒得選擇)
[9.90s -> 11.90s] (現在我想做一個好人)
[15.87s -> 16.63s] (好)
[19.16s -> 20.76s] (去跟法官說啊)
[21.89s -> 23.52s] (看他讓不讓你做好人)
[24.50s -> 26.97s] 我是SPIDA 我的兄弟都是SPIDA
[27.06s -> 30.29s] 貼在黏糊糊的網羅大家跟我們當SPIDA
[31.00s -> 33.31s] 我是SPIDA我的朋友都是SPIDA
[33.54s -> 36.61s] 虛情假意的唱歌跟吃飯不配當個SPIDA
[36.90s -> 39.81s] Ho想當SPIDA 吃個飯問愛不愛他
[39.91s -> 42.61s] 當好人好難覺得奇怪你們不配
[43.58s -> 45.58s] 我是SPIDA 我的家人都是SPIDA
[46.05s -> 49.17s] If you brothers got problems say it to my SPIDA
[49.88s -> 51.14s] I feel nothing change
[51.14s -> 52.97s] 結帳的時候我依舊是拿零錢
[52.97s -> 54.12s] 你們沒有看走眼
[54.12s -> 56.12s] 這裡很平等別在意Body shape
[56.12s -> 57.23s] 只知道看影片
[57.23s -> 58.84s] 距離那麼遠最好能看裡面
[58.84s -> 60.40s] 不分青紅皂 拿著大龍炮
[60.40s -> 62.08s] 想要轟我 給我先等一下
[62.08s -> 63.33s] Everywhere 都有人cap
[63.33s -> 64.85s] 假借酒醉後壯著膽
[64.85s -> 66.37s] 該拿的 show fe 你忘了算
[66.37s -> 68.00s] 我們應該多讓你做核酸
[68.00s -> 69.88s] 真正的好朋友不用多幾個
[69.88s -> 71.50s] 我很嚴肅但不會說立正
[71.50s -> 72.95s] 跟那些王八蛋說聲去