In [None]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.action_chains import ActionChains
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.chrome.service import Service
import time
import pandas as pd
import undetected_chromedriver as uc  #比較新的反制爬蟲的套件
# 

class CteeScraper:
    def __init__(self):
        # 設定 Chrome 選項
        self.chrome_options = webdriver.ChromeOptions()
        # self.chrome_options.add_argument('--headless')  # 無頭模式，不開啟瀏覽器視窗
        self.chrome_options.add_argument('--disable-gpu')
        self.chrome_options.add_argument('--no-sandbox')
        self.chrome_options.add_argument('--disable-dev-shm-usage')
        self.chrome_options.add_argument('Windows Chrome Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36')

        # 初始化瀏覽器
        self.driver = uc.Chrome(options=self.chrome_options)
        # self.driver = webdriver.Chrome(
            # service=Service(ChromeDriverManager().install()),
            # options=self.chrome_options
        # )

    def get_news(self):
        try:
            # 訪問網頁
            self.driver.get('https://www.ctee.com.tw/livenews/stock')
            time.sleep(2)  # 等待頁面載入

            news_data = []

            for _ in range(10):  # 持續按下 "載入更多" 按鈕十次
                # 等待新聞列表載入
                news_elements = WebDriverWait(self.driver, 4).until(
                    EC.presence_of_all_elements_located((By.CSS_SELECTOR, '.newslist__card'))
                )

                # 解析新聞資料
                for news in news_elements:
                    try:
                        title = news.find_element(By.CSS_SELECTOR, 'h3.news-title').text
                        time_date = news.find_element(By.CSS_SELECTOR, '.news-date').text
                        time_text = news.find_element(By.CSS_SELECTOR, '.news-time').text
                        link = news.find_element(By.CSS_SELECTOR, 'h3.news-title a').get_attribute('href')

                        news_data.append({
                            'date': time_date,
                            'time': time_text,
                            'title': title,
                            'link': link
                        })
                    except Exception as e:
                        print(f"解析單則新聞時發生錯誤: {str(e)}")
                        continue

                # 滾動到頁面底部
                self.driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
                time.sleep(2)  # 等待頁面滾動

                # 點擊 "載入更多" 按鈕
                load_more_button = WebDriverWait(self.driver, 10).until(
                    EC.element_to_be_clickable((By.ID, 'moreBtn'))
                )
                # 把load_more_button放在畫面的中間
                self.driver.execute_script("arguments[0].scrollIntoView({block: 'center'});", load_more_button)
                time.sleep(1)

                # 使用 ActionChains 模擬點擊動作
                # actions = ActionChains(self.driver)
                # actions.move_to_element(load_more_button).click().perform()
                # 使用 JavaScript 模擬點擊動作
                # self.driver.execute_script("arguments[0].click();", load_more_button)
                load_more_button.click()
                time.sleep(2)  # 等待新資料載入

                # 檢查是否有新資料載入
                new_news_elements = WebDriverWait(self.driver, 6).until(
                    EC.presence_of_all_elements_located((By.CSS_SELECTOR, '.newslist__card'))
                )
                if len(new_news_elements) <= len(news_elements):
                    print("沒有新資料載入，停止爬取。")
                    break

            # 轉換成 DataFrame
            df = pd.DataFrame(news_data)
            # 儲存成 CSV
            df.to_csv('ctee_news.csv', index=False, encoding='utf-8-sig')
            return df

        except Exception as e:
            print(f"爬取過程發生錯誤: {str(e)}")
            return None

        finally:
            self.driver.quit()

# 執行爬蟲
if __name__ == "__main__":
    scraper = CteeScraper()
    news_df = scraper.get_news()
    if news_df is not None:
        print("爬取成功！")
        print(news_df)


沒有新資料載入，停止爬取。
爬取成功！
            date   time                                   title  \
0     2024.11.30  16:45               冠軍半年獲利近5千萬！群益股王爭霸戰健康財富全到手   
1     2024.11.30  15:36              《通網股》遠傳雙軸轉型成效卓著　獲「金恆獎」金獎肯定   
2     2024.11.30  15:20  《通網股》撘載聯發科天璣9400 台灣大專案0元入手OPPO Find X8   
3     2024.11.30  14:56            《通網股》電子發票升級！中華電獲「雲端物聯網創新獎」肯定   
4     2024.11.30  14:46         《百貨股》統一超推「7-ELEVEN饗喫鍋火鍋專區」搶冒煙商機   
...          ...    ...                                     ...   
1645  2024.11.27  03:00                                技嘉 內外資相挺   
1646  2024.11.26  22:04             台指期夜盤摜破季線、半年線收腳 後市觀察外資淨空單變化   
1647  2024.11.26  20:26                美股及美債火熱 複委託成長78％ 日股成長逾2倍   
1648  2024.11.26  20:23             創新板、KY股法說12／5登場 金麗、永邑、麗豐打頭陣   
1649  2024.11.26  19:36                           11月26日一分鐘強弱勢股   

                                                   link  
0     https://www.ctee.com.tw/news/20241130700628-43...  
1     https://www.ctee.com.tw/news/20241130700632-43...  
2