In [1]:
'''
匯入套件
'''
# 操作 browser 的 API
from selenium import webdriver
from selenium.webdriver.chrome.service import Service

# ChromeDriver 的下載管理工具
from webdriver_manager.chrome import ChromeDriverManager

# 處理逾時例外的工具
from selenium.common.exceptions import TimeoutException

# 面對動態網頁，等待某個元素出現的工具，通常與 exptected_conditions 搭配
from selenium.webdriver.support.ui import WebDriverWait

# 搭配 WebDriverWait 使用，對元素狀態的一種期待條件，若條件發生，則等待結束，往下一行執行
from selenium.webdriver.support import expected_conditions as EC

# 期待元素出現要透過什麼方式指定，通常與 EC、WebDriverWait 一起使用
from selenium.webdriver.common.by import By

# 取得系統時間的工具
from datetime import datetime

# 強制停止/強制等待 (程式執行期間休息一下)
from time import sleep

# 處理下拉式選單的工具
from selenium.webdriver.support.ui import Select

import pprint
import os

# 隨機取得 User-Agent
from fake_useragent import UserAgent
ua = UserAgent()

# 啟動瀏覽器工具的選項
my_options = webdriver.ChromeOptions()
# my_options.add_argument("--headless")                # 不開啟實體瀏覽器背景執行
my_options.add_argument("--start-maximized")         # 最大化視窗
my_options.add_argument("--incognito")               # 開啟無痕模式
my_options.add_argument("--disable-popup-blocking")  # 禁用彈出攔截
my_options.add_argument("--disable-notifications")   # 取消通知
my_options.add_argument(f'--user-agent={ua.random}') # (Optional)加入 User-Agent

# 使用 Chrome 的 WebDriver
driver = webdriver.Chrome(
    options = my_options,
)

# 儲存資料的變數
list_data = []

# 走訪網址
url = 'https://consumer.fda.gov.tw/Food/TFND.aspx?nodeID=178'
type_index = 18
keyword = '冷凍'

In [2]:
#visit url page
def visit():
    driver.get(url);

#select drop-down menu:
def setDropDownMenu(type_index):
    try:
        #強制等待
        sleep(1)

        # 選擇食物類別
        type = Select(driver.find_element(By.CSS_SELECTOR, 'li > select'))
        type.select_by_index(type_index)
                      
        # 強制等待
        sleep(2)
    except Exception as err:
        print(str(err))

def key_in_search(keyword):
    try:
        search_box = driver.find_element(
            By.CSS_SELECTOR, 'input#ctl00_content_SelectK'
        )
        search_box.send_keys(keyword)

        # 強制等待
        sleep(1)
        
        # 按下查詢
        driver.find_element(
            By.CSS_SELECTOR, 'input#ctl00_content_searchBut'
        ).click()

        # 強制等待
        sleep(2)
    except Exception as err:
            print(str(err))

visit()
setDropDownMenu(type_index)
key_in_search(keyword)

## 開啟分頁

In [3]:
# 連續開 3 個分頁 (javascript)
for i in range(9):
    driver.execute_script(f'window.open("");')

In [4]:
# 切換到初始分頁 (索引為 0)
driver.switch_to.window(driver.window_handles[0])

In [5]:
# 將所有 tabs 轉址，以便取得對應列表; 0 不變
for index in range(1, len(driver.window_handles)):
    #change tabs
    driver.switch_to.window(
        driver.window_handles[index]
    )
    # 使分頁自動連結到指定網址 (此時的 drive 變數指向切後的分頁)
    driver.get(f'https://consumer.fda.gov.tw/Food/TFND.aspx?nodeID=178&t={type_index}&k={keyword}&p={index}')  

    # 強制等待
    sleep(3)

In [6]:
# 取得分頁列表
for index in range(1, len(driver.window_handles)):
    #change tabs
    driver.switch_to.window(driver.window_handles[index])

    #取得列表連結與內文
    for a in driver.find_elements(By.CSS_SELECTOR, 'td[data-th="樣品名稱"]>a'):
        list_data.append({
            'Food': a.get_attribute('innerText'),
            'Link': a.get_attribute('href')
        })

In [None]:
# 讀最後一頁開始，把所有分頁關掉 (初始頁要保留)
while len(driver.window_handles) > 1:
    #change tabs
    driver.switch_to.window(
        driver.window_handles[ len(driver.window_handles) - 1 ]
    )

    #close tabs
    driver.close()

In [7]:
#view list
list_data

[{'Food': '冷凍火腿炒飯',
  'Link': 'https://consumer.fda.gov.tw/Food/tfndDetail.aspx?nodeID=178&f=0&id=1753'},
 {'Food': '冷凍蝦仁炒飯',
  'Link': 'https://consumer.fda.gov.tw/Food/tfndDetail.aspx?nodeID=178&f=0&id=1770'},
 {'Food': '冷凍筒仔米糕',
  'Link': 'https://consumer.fda.gov.tw/Food/tfndDetail.aspx?nodeID=178&f=0&id=1797'},
 {'Food': '冷凍芝麻湯圓',
  'Link': 'https://consumer.fda.gov.tw/Food/tfndDetail.aspx?nodeID=178&f=0&id=1755'},
 {'Food': '冷凍花生湯圓',
  'Link': 'https://consumer.fda.gov.tw/Food/tfndDetail.aspx?nodeID=178&f=0&id=1756'},
 {'Food': '冷凍豬肉湯圓',
  'Link': 'https://consumer.fda.gov.tw/Food/tfndDetail.aspx?nodeID=178&f=0&id=1774'},
 {'Food': '冷凍烏龍麵',
  'Link': 'https://consumer.fda.gov.tw/Food/tfndDetail.aspx?nodeID=178&f=0&id=1972'},
 {'Food': '冷凍牛肉水餃',
  'Link': 'https://consumer.fda.gov.tw/Food/tfndDetail.aspx?nodeID=178&f=0&id=1784'},
 {'Food': '冷凍豬肉水餃',
  'Link': 'https://consumer.fda.gov.tw/Food/tfndDetail.aspx?nodeID=178&f=0&id=1771'},
 {'Food': '冷凍豬肉韭菜水餃',
  'Link': 'https://consum

## 拿資料

In [12]:
# 儲存食物資料跟營養成分
food_info = []
food_nutr = []
selected_items = []

In [9]:
url_test = 'https://consumer.fda.gov.tw/Food/tfndDetail.aspx?nodeID=178&f=0&id=1753'
driver.get(url_test)

def extract_food_info():
    info = {
        'Food Category 食品分類': driver.find_element(By.CSS_SELECTOR, '#ctl00_content_lbFoodCategoryName').get_attribute('innerText'),
        'Data Type 資料類別': driver.find_element(By.CSS_SELECTOR, '#ctl00_content_lbDataType').get_attribute('innerText'),
        #'Int No 整合編號': driver.find_elements(By.CSS_SELECTOR, '#ctl00_content_lbIntNo').get_attribute('innerText'),
        'Food Name 樣品名稱': driver.find_element(By.CSS_SELECTOR, '#ctl00_content_lbFoodName').get_attribute('innerText'),
        'Other Name 俗名': driver.find_element(By.CSS_SELECTOR, '#ctl00_content_lbTrivialName').get_attribute('innerText'),
        'English Name 樣品英文名稱': driver.find_element(By.CSS_SELECTOR, '#ctl00_content_lbFoodEName').get_attribute('innerText'),
        'Content 內容物描述': driver.find_element(By.CSS_SELECTOR, '#ctl00_content_lbContent').get_attribute('innerText')
    }

    food_info.append(info)

extract_food_info()
pprint.pprint(food_info, sort_dicts=False)

[{'Food Category 食品分類': '加工調理食品及其他類',
  'Data Type 資料類別': '樣品基本資料',
  'Food Name 樣品名稱': '冷凍火腿炒飯',
  'Other Name 俗名': '',
  'English Name 樣品英文名稱': 'Fried rice',
  'Content 內容物描述': '樣品狀態:冷凍包裝(米,火腿,蔬菜等); 前處理描述:混合均勻打碎'}]


In [13]:
url_test = 'https://consumer.fda.gov.tw/Food/tfndDetail.aspx?nodeID=178&f=0&id=1753'
driver.get(url_test)

# Define the specific '分析項' you're interested in
selected_items = ["熱量", "粗蛋白", "粗脂肪", "總碳水化合物", "膳食纖維", "糖質總量", "鈉", "膽固醇","反式脂肪"]

# Locate rows in a table
trs = driver.find_elements(By.CSS_SELECTOR, 'table.rwd-table > tbody > tr')

# Loop through each row and extract '分析項' and '每100克含量'
def extract_food_nutr():
    # append food name
    food_name = {'樣品名稱': driver.find_element(By.CSS_SELECTOR, '#ctl00_content_lbFoodName').get_attribute('innerText')}
    food_nutr.append(food_name)
    for tr in trs:
        tds = tr.find_elements(By.CSS_SELECTOR, 'td.txt_C')
        if len(tds) >= 2:  # Check if there are at least 2 <td> elements
            for item in selected_items:
                if item in tds[1].text.strip():
                    # You can add further actions here like extracting the data you need
                    nutr = {
                        item: tds[3].text.strip()
                    }                        
                    # Append the extracted info
                    food_nutr.append(nutr)

                    #break out of loop
                    break
    # pprint(food_nutr)
        
''' 
        #取得列表內文
        analysis_item = row.find_elements(By.CSS_SELECTOR, 'td:nth-child(2)').get_attribute('innerText')
        if analysis_item in selected_items:
            content_per_100g = row.find_element(By.CSS_SELECTOR, 'td:nth-child(4)').text.strip()

            # Append the item and its content per 100g to the dictionary
            food_nutr[analysis_item] = content_per_100g
'''

# execute function and print the nutrition data dictionary
extract_food_nutr()
pprint.pprint(food_nutr)

[{'樣品名稱': '冷凍火腿炒飯'},
 {'熱量': '189'},
 {'熱量': '185'},
 {'粗蛋白': '5.0'},
 {'粗脂肪': '5.6'},
 {'總碳水化合物': '29.7'},
 {'膳食纖維': '2.0'},
 {'糖質總量': ''},
 {'鈉': '220'},
 {'膽固醇': '37'},
 {'反式脂肪': ''}]


In [None]:
#iterate over list_data (all of frozen food links)
def visit_food():
    for list in list_data:
        driver.get(list_data.value(list))
        
        sleep(3)

        extract_food_info()
        food_nutr()

In [None]:
#close browser
driver.quit()