In [None]:
# 操作 browser 的 API
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager

# 處理逾時例外的工具
from selenium.common.exceptions import TimeoutException

# 面對動態網頁，等待某個元素出現的工具，通常與 exptected_conditions 搭配
from selenium.webdriver.support.ui import WebDriverWait

# 搭配 WebDriverWait 使用，對元素狀態的一種期待條件，若條件發生，則等待結束，往下一行執行
from selenium.webdriver.support import expected_conditions as EC

# 期待元素出現要透過什麼方式指定，通常與 EC、WebDriverWait 一起使用
from selenium.webdriver.common.by import By

# 強制等待 (執行期間休息一下)
from time import sleep

# 整理 json 使用的工具
import json

# 執行 command 的時候用的
import os

# 子處理程序，用來取代 os.system 的功能
import subprocess

# 啟動瀏覽器工具的選項
my_options = webdriver.ChromeOptions()
# my_options.add_argument("--headless")                #不開啟實體瀏覽器背景執行
my_options.add_argument("--start-maximized")         #最大化視窗
my_options.add_argument("--incognito")               #開啟無痕模式
my_options.add_argument("--disable-popup-blocking") #禁用彈出攔截
my_options.add_argument("--disable-notifications")  #取消 chrome 推播通知
my_options.add_argument("--lang=zh-TW")  #設定為正體中文

# 使用 Chrome 的 WebDriver
driver = webdriver.Chrome(
    options = my_options,
    service = Service(ChromeDriverManager().install())
)


folderPath = 'youtube'
if not os.path.exists(folderPath):
    os.makedirs(folderPath)

In [None]:
listData=[]
def visit():
    driver.get('https://www.youtube.com/');
    
def search():
    txtInput=driver.find_element(By.CSS_SELECTOR,"input#search")
    txtInput.send_keys("相愛後動物感傷")
    sleep(1)
    txtInput.submit()
    sleep(1)
    

def filterFunc():
    try:
        WebDriverWait(driver, 10).until(
            EC.presence_of_element_located( 
                (By.CSS_SELECTOR, "ytd-toggle-button-renderer.style-scope.ytd-search-sub-menu-renderer") 
            )
        )
        
        
        driver.find_element(
            By.CSS_SELECTOR, 
            "ytd-toggle-button-renderer.style-scope.ytd-search-sub-menu-renderer"
        ).click()
            
        sleep(2)
        
        driver.find_elements(
            By.CSS_SELECTOR, 
            "yt-formatted-string.style-scope.ytd-search-filter-renderer"
        )[9].click()
        
        sleep(2)
        
        
    except TimeoutException:
         print("等待逾時")
            
            
def scroll ():
    innerHeight = 0
    offset = 0
    count= 0
    limit = 3
    
    while count <=limit:
        offset = driver.execute_script(
            'return window.document.documentElement.scrollHeight;'
        )
        
        driver.execute_script(f''' 
            window.scrollTo({{
                top:{offset},
                behavior:"smooth"
            }}); 
        ''')
        
        sleep(3)
        
        innerHeight = driver.execute_script(
            'return window.document.documentElement.scrollHeight;'
        );
        
        if offset == innerHeight:
            count+=1
            
        
        if offset >= 1000:
            break;
            
            
            
def parse():
    ytd_video_renderers = driver.find_elements(
        By.CSS_SELECTOR,
        "ytd-video-renderer.style-scope.ytd-item-section-renderer"
    )
      
    for ytd_video_renderer in ytd_video_renderers:
        print("="*30)
        
     
        # 取得圖片連結
        img = ytd_video_renderer.find_element(
            By.CSS_SELECTOR, 
            "img"
        )
        imgSrc = img.get_attribute('src')
        print(imgSrc)
        
        #取得 資料名稱
        a = ytd_video_renderer.find_element(By.CSS_SELECTOR, "a#video-title")
        aTitle = a.get_attribute('innerText')
        print(aTitle)
        
        #取的 連結
        aLink= a.get_attribute('href')
        print(aLink)
        
        #取得 ID
        strDelimiter = 'v='  
        youtube_id = aLink.split(strDelimiter)[1]
        print(youtube_id)
        
        
        # 放資料到list中
        listData.append({
            "id": youtube_id,
            "title": aTitle,
            "link": aLink,
            "img": imgSrc
        })
        
def saveJson():
    with open (f"{folderPath}/youtube.json",'w',encoding='utf-8') as file :
        file.write( json.dumps(listData, ensure_ascii=False, indent=4) )
        
        
def close():
    driver.quit()

In [None]:
if __name__ =="__main__":
    visit()
    search()
    filterFunc()
    scroll()
    parse()
    saveJson()
    close()

In [None]:
def download():
    with open (f"{folderPath}/youtube.json",'r',encoding='utf-8') as file :
        strJson = file.read()
    listResult = json.loads(strJson)
    
    for index, obj in enumerate(listResult):
        if index == 3:
                break
        
        print("=" * 50)
        print(f"正在下載連結: {obj['link']}")
        
        cmd = [
            './yt-dlp.exe', 
            obj['link'], 
            '-f', 'b[ext=mp4]', # 最好的品質
            '-o', f'{folderPath}/%(title)s.%(ext)s' 
        ]
        
        
        result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)

    
        output = result.stdout
        print("下載完成，訊息如下:")
        print(output)
        

download()