In [24]:
'''
匯入套件
'''
# 操作 browser 的 API
from selenium import webdriver

# 處理逾時例外的工具
from selenium.common.exceptions import TimeoutException

# 面對動態網頁，等待某個元素出現的工具，通常與 exptected_conditions 搭配
from selenium.webdriver.support.ui import WebDriverWait

# 搭配 WebDriverWait 使用，對元素狀態的一種期待條件，若條件發生，則等待結束，往下一行執行
from selenium.webdriver.support import expected_conditions as EC

# 期待元素出現要透過什麼方式指定，通常與 EC、WebDriverWait 一起使用
from selenium.webdriver.common.by import By

# 強制等待 (執行期間休息一下)
from time import sleep

# 整理 json 使用的工具
import json

# 執行 command 的時候用的
import os

# 使用 Chrome 的 WebDriver
driver = webdriver.Chrome()

# 螢幕最大化
#driver.maximize_window()

# 放置爬取的資料
listData = []

'''
以 function 名稱，作為爬蟲流程
'''

# 走訪頁面
def visit():
    driver.get('https://www.youtube.com/');

# 輸入關鍵字
def search():
    # 輸入名稱
    txtInput = driver.find_element(By.CSS_SELECTOR, "input#search")
    txtInput.send_keys("Trump")
    
    #按下送出
    btnInput = driver.find_element(By.CSS_SELECTOR, "button#search-icon-legacy")
    btnInput.click()
    
# 篩選 (選項)
def filterFunc():
    # 等待篩選元素出現
    WebDriverWait(driver, 10).until(
        EC.presence_of_element_located( 
            (By.CSS_SELECTOR, "yt-formatted-string#text.style-scope.ytd-toggle-button-renderer.style-text") 
        )
    )
    
    #按下篩選元素，使項目浮現
    driver.find_element(By.CSS_SELECTOR, "yt-formatted-string#text.style-scope.ytd-toggle-button-renderer.style-text").click()
    
    # 等待一下
    sleep(2)
    
    # 按下選擇的項目
    driver.find_elements(By.CSS_SELECTOR, "yt-formatted-string.style-scope.ytd-search-filter-renderer")[10].click()
    
    
# 滾動頁面
def scroll():
    # 瀏覽器內部的高度
    innerHeightOfWindow = 0
    
    # 當前捲動的量(高度)
    totalOffset = 0
    
    # 在捲動到沒有元素動態產生前，持續捲動
    while totalOffset <= innerHeightOfWindow:
        # 每次移動高度
        totalOffset += 300;
        
        # 捲動的 js code
        js_scroll = '''(
            function (){{
                window.scrollTo({{
                    top:{}, 
                    behavior: 'smooth' 
                }});
            }})();'''.format(totalOffset)
        
        # 執行 js code
        driver.execute_script(js_scroll)
        
        # 強制等待
        sleep(1)
        
        # 透過執行 js 語法來取得捲動後的當前總高度
        innerHeightOfWindow = driver.execute_script('return window.document.documentElement.scrollHeight;');
        
        # 強制等待
        sleep(1)
        
        # 印出捲動距離
        print("innerHeightOfWindow: {}, totalOffset: {}".format(innerHeightOfWindow, totalOffset))
        
        # 為了實驗功能，捲動超過一定的距離，就結束程式
        if totalOffset >= 300:
            break
            
# 分析頁面元素資訊
def parse():
    # 取得主要元素的集合
    ytd_video_renderers = driver.find_elements(By.CSS_SELECTOR, 'ytd-video-renderer.style-scope.ytd-item-section-renderer')
    
    # 逐一檢視元素
    for element in ytd_video_renderers:
        # 取得圖片連結
        img = element.find_element(By.CSS_SELECTOR, "ytd-thumbnail.style-scope.ytd-video-renderer img#img")
        imgSrc = img.get_attribute('src')
        print(imgSrc)
        
        # 取得資料名稱
        a = element.find_element(By.CSS_SELECTOR, "a#video-title")
        aTitle = a.text
        print(aTitle)
        
        aLink = a.get_attribute('href')
        print(aLink)
        
        # 放資料到 list 中
        listData.append({
            "imgSrc": imgSrc,
            "aTitle": aTitle,
            "aLink": aLink
        })

# 將 list 存成 json
def saveJson():
    fp = open("youtube.json", "w", encoding='utf-8')
    fp.write( json.dumps(listData, ensure_ascii=False) )
    fp.close()
    
# 關閉瀏覽器
def close():
    driver.quit()
    
    
def download():
    # 開啟 json 檔案
    fp = open("youtube.json", "r", encoding='utf-8')
    
    #取得 json 字串
    strJson = fp.read()
    
    # 關閉檔案
    fp.close()
    
    # 將 json 轉成 list (裡面是 dict 集合)
    listResult = json.loads(strJson)
    
    # ??????
    for index, obj in enumerate(listResult):
        if index <= 2:
            os.system("youtube-dl.exe -f mp4 -i {} -o {}".format(obj["aLink"], "%(id)s.%(ext)s"))


visit()
search()
filterFunc()
scroll()
parse()
saveJson()
close()
download()

innerHeightOfWindow: 3281, totalOffset: 300
https://i.ytimg.com/vi/ea7XeWQ_Ybo/hqdefault.jpg?sqp=-oaymwEZCPYBEIoBSFXyq4qpAwsIARUAAIhCGAFwAQ==&rs=AOn4CLDhgaF6sVt7k-NpGZDFONgI6OIz7g
Trump visits St. John's Episcopal Church, which was set on fire last night
https://www.youtube.com/watch?v=ea7XeWQ_Ybo
https://i.ytimg.com/vi/Btjk0_WRIQ4/hqdefault.jpg?sqp=-oaymwEZCPYBEIoBSFXyq4qpAwsIARUAAIhCGAFwAQ==&rs=AOn4CLBR43bG-5DessAtVvv6V3URX39fkQ
Trump threatens to deploy military against protesters
https://www.youtube.com/watch?v=Btjk0_WRIQ4
https://i.ytimg.com/vi/a9sIH4S2JC4/hqdefault.jpg?sqp=-oaymwEZCPYBEIoBSFXyq4qpAwsIARUAAIhCGAFwAQ==&rs=AOn4CLBR_CGAc-DHJA9gdokfMOxmoYr89w
Trump threatens to deploy military to end protests
https://www.youtube.com/watch?v=a9sIH4S2JC4
https://i.ytimg.com/vi/Ti-Ekg6HGSw/hqdefault.jpg?sqp=-oaymwEZCPYBEIoBSFXyq4qpAwsIARUAAIhCGAFwAQ==&rs=AOn4CLBFf5EIF-e99nN7sp7B1ZfhT6q3zg
Pres. Trump says he is mobilizing 'heavily armed' military to stop protests in DC | ABC News
https:/