In [1]:
# 必要なライブラリをインポート
!pip install selenium

from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
import datetime

def extract_stock_data(driver):

    html = driver.page_source
    soup = BeautifulSoup(html, "html.parser")
    tooltip = soup.select_one("div.highcharts-tooltip table")

    if not tooltip:
        return None

    tds = tooltip.select("td")
    if len(tds) < 5:
        return None

    # 日付の整形
    try:
        y, m, d = map(int, tds[0].text.strip().split('/'))
        date = f"{y:04d}-{m:02d}-{d:02d}"
    except:
        return None

    # 株価4項目
    prices = [td.text.strip().replace(',', '') for td in tds[1:5]]
    return [date] + prices


def get_stock_values(driver, url):
    driver.get(url)
    time.sleep(5)

    chart = WebDriverWait(driver, 10).until(
        EC.presence_of_element_located((By.CSS_SELECTOR, 'g.highcharts-series-group'))
    )
    actions = ActionChains(driver)
    # グラフ右端にマウスを移動
    chart_size = chart.size
    width = chart_size['width']

    actions.move_to_element_with_offset(chart, width// 2, 0).perform()
    #actions.move_to_element(chart).perform() #グラフの中央に移動
    #actions.move_by_offset(width // 2, 0).perform() #グラフの右端に移動
    time.sleep(0.5)

    data_points = []
    seen_dates = set()

    for _ in range(width):
        try:
            actions.move_by_offset(-1, 0).perform()  # 左に1pxずつ移動
            time.sleep(0.015)

            stock_data = extract_stock_data(driver)
            #print(stock_data)
            if stock_data and stock_data[0] not in seen_dates:
                data_points.append(stock_data)
                seen_dates.add(stock_data[0])

        except Exception as e:
            print(f"移動エラー: {e}")
            continue

    return data_points


def main():
    start_time = time.time()

    # ヘッドレスモードで起動するためのオプションを設定
    chrome_options = webdriver.ChromeOptions()
    chrome_options.add_argument('--headless')
    chrome_options.add_argument('--no-sandbox')
    chrome_options.add_argument('--disable-dev-shm-usage')
    chrome_options.add_argument("--window-size=1920,1080") #ブラウザウィンドウサイズを広げる

    # Chromeを立ち上げる
    chrome_driver = webdriver.Chrome(options=chrome_options)

    try:
        url = 'https://www.nikkei.com/markets/worldidx/chart/nk225/?type=6month'
        stock_data_list = get_stock_values(chrome_driver, url)

        end_time = time.time()
        duration = end_time - start_time
        print(f"スクレイピング時間: {duration:.2f}秒\n")

        for data in stock_data_list:
            print(", ".join(data))
    finally:
        chrome_driver.quit()

if __name__ == "__main__":
    main()


Collecting selenium
  Downloading selenium-4.34.0-py3-none-any.whl.metadata (7.5 kB)
Collecting trio~=0.30.0 (from selenium)
  Downloading trio-0.30.0-py3-none-any.whl.metadata (8.5 kB)
Collecting trio-websocket~=0.12.2 (from selenium)
  Downloading trio_websocket-0.12.2-py3-none-any.whl.metadata (5.1 kB)
Collecting outcome (from trio~=0.30.0->selenium)
  Downloading outcome-1.3.0.post0-py2.py3-none-any.whl.metadata (2.6 kB)
Collecting wsproto>=0.14 (from trio-websocket~=0.12.2->selenium)
  Downloading wsproto-1.2.0-py3-none-any.whl.metadata (5.6 kB)
Downloading selenium-4.34.0-py3-none-any.whl (9.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.4/9.4 MB[0m [31m109.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading trio-0.30.0-py3-none-any.whl (499 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m499.2/499.2 kB[0m [31m43.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading trio_websocket-0.12.2-py3-none-any.whl (21 kB)
Downloading outcome-1.3.0.pos