In [1]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
from datetime import datetime
import re
import time

# Chromeドライバの起動
driver = webdriver.Chrome()

# 検索対象タイトル（ここを変えれば他タイトルにも応用可）
title = "V4"

print(f"検索中：{title}")

try:
    # トップページにアクセス
    driver.get("https://www.4gamer.net/")
    WebDriverWait(driver, 10).until(EC.visibility_of_element_located((By.NAME, "word")))

    # 検索実行
    search_box = driver.find_element(By.NAME, "word")
    search_box.clear()
    search_box.send_keys(title)
    search_box.send_keys(Keys.RETURN)

    # 検索結果から /games/ リンクを取得
    WebDriverWait(driver, 10).until(
        EC.visibility_of_all_elements_located((By.CSS_SELECTOR, 'h2 > a[href^="/games/"]'))
    )
    links = driver.find_elements(By.CSS_SELECTOR, 'h2 > a[href^="/games/"]')
    game_links = [l.get_attribute("href") for l in links if l.get_attribute("href") and "/games/" in l.get_attribute("href")]
    
    if not game_links:
        print(" → /games/ リンクが見つかりませんでした")
    else:
        print(" → 見つかったリンク一覧：")
        for url in game_links:
            print("   ", url)

        # 最初のリンクにアクセスして発売日を抽出
        url = game_links[0]
        if not url.startswith("http"):
            url = "https://www.4gamer.net" + url

        driver.get(url)
        WebDriverWait(driver, 10).until(
            EC.visibility_of_element_located((By.TAG_NAME, "th"))
        )

        th_list = driver.find_elements(By.TAG_NAME, "th")
        found = False

        for th in th_list:
            if "発売日" in th.text:
                td_list = th.find_elements(By.XPATH, "./following-sibling::td")
                if len(td_list) >= 2:
                    date_text = td_list[1].text.strip()
                    match = re.search(r"\d{4}/\d{2}/\d{2}", date_text)
                    if match:
                        date_str = match.group()
                        parsed = datetime.strptime(date_str, "%Y/%m/%d")
                        print(f" → 発売日：{date_str}")
                        found = True
                        break

        if not found:
            print(" → 発売日が見つかりませんでした")

except TimeoutException:
    print(" → タイムアウトしました")
except Exception as e:
    print(" → 予期せぬエラーが発生しました")
    import traceback
    traceback.print_exc()
finally:
    driver.quit()


検索中：V4
 → 見つかったリンク一覧：
    https://www.4gamer.net/games/487/G048745/
    https://www.4gamer.net/games/459/G045905/
    https://www.4gamer.net/games/459/G045904/
    https://www.4gamer.net/games/369/G036978/
    https://www.4gamer.net/games/029/G002929/
    https://www.4gamer.net/games/015/G001538/
    https://www.4gamer.net/games/038/G003859/
    https://www.4gamer.net/games/766/G076679/
    https://www.4gamer.net/games/032/G003214/
 → 発売日：2020/09/24


In [3]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
from datetime import datetime
import pandas as pd
import traceback
import re

# --- データ読み込み ---
df = pd.read_csv("cleaned_unique_titles.csv")  # タイトル列: "タイトル"
df["発売日"] = ""
df["備考"] = ""
total = len(df)
not_found_titles = []

# --- Selenium起動 ---
driver = webdriver.Chrome()

# --- V4だけ処理 ---
for idx, row in df.iterrows():
    title = row["タイトル"]
    if title != "V4":
        continue

    print(f"[{idx+1}/{total}] 検索中：{title}")

    try:
        # 検索ページ操作
        driver.get("https://www.4gamer.net/")
        WebDriverWait(driver, 10).until(EC.visibility_of_element_located((By.NAME, "word")))

        search_box = driver.find_element(By.NAME, "word")
        search_box.clear()
        search_box.send_keys(title)
        search_box.send_keys(Keys.RETURN)

        WebDriverWait(driver, 10).until(
            EC.visibility_of_all_elements_located((By.CSS_SELECTOR, 'h2 > a[href^="/games/"]'))
        )
        links = driver.find_elements(By.CSS_SELECTOR, 'h2 > a[href^="/games/"]')
        game_links = [l.get_attribute("href") for l in links if l.get_attribute("href") and "/games/" in l.get_attribute("href")]

        if not game_links:
            print(" → ゲーム紹介ページが見つかりません")
            not_found_titles.append(title)
            continue

        # 詳細ページをたどる
        release_dates = []

        for url in game_links:
            if not url.startswith("http"):
                url = "https://www.4gamer.net" + url

            driver.get(url)
            WebDriverWait(driver, 10).until(EC.visibility_of_element_located((By.TAG_NAME, "th")))
            th_list = driver.find_elements(By.TAG_NAME, "th")

            # 発売日取得（td[1] → td[0]）
            for th in th_list:
                if "発売日" in th.text:
                    td_list = th.find_elements(By.XPATH, "./following-sibling::td")
                    if td_list:
                        date_text = td_list[1].text.strip()
                        match = re.search(r"\d{4}/\d{2}/\d{2}", date_text)
                        if match:
                            date_str = match.group()
                            parsed = datetime.strptime(date_str, "%Y/%m/%d")
                            release_dates.append((parsed, date_str))
                            break

            # 備考取得（td[1] → td[0]）
            for th in th_list:
                if "備考" in th.text:
                    td_list = th.find_elements(By.XPATH, "./following-sibling::td")
                    if td_list:
                        a_tags = td_list[1].find_elements(By.TAG_NAME, "a")
                        remarks = [a.text.strip() for a in a_tags if a.text.strip()]
                        if remarks:
                            df.at[idx, "備考"] = ", ".join(remarks)
                    break

        # 発売日決定
        if release_dates:
            oldest = min(release_dates, key=lambda x: x[0])[1]
            df.at[idx, "発売日"] = oldest
            print(f" → 最も古い発売日：{oldest}")
            if df.at[idx, "備考"]:
                print(f" → 備考：{df.at[idx, '備考']}")
        else:
            print(" → 発売日が見つかりません")
            not_found_titles.append(title)

    except TimeoutException:
        print(" → 検索結果が見つかりません（タイムアウト）")
        not_found_titles.append(title)
        continue

    except Exception:
        print(" → エラーが発生しました")
        traceback.print_exc()
        not_found_titles.append(title)
        continue

# --- 結果保存 ---
df.to_csv("4gamer_output_v4_only.csv", index=False, encoding="utf-8-sig")

# --- 終了処理 ---
driver.quit()
print("\n完了しました。取得できなかったタイトル数:", len(not_found_titles))


[280/393] 検索中：V4
 → 最も古い発売日：2006/06/17
 → 備考：欧州, カジュアル, ほのぼの, ドライブ

完了しました。取得できなかったタイトル数: 0


In [4]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
from datetime import datetime
import pandas as pd
import traceback
import re

# --- データ読み込み ---
df = pd.read_csv("cleaned_unique_titles.csv")
df["発売日"] = ""
df["備考"] = ""
total = len(df)
not_found_titles = []

# --- Selenium起動 ---
driver = webdriver.Chrome()

# --- V4だけ処理 ---
for idx, row in df.iterrows():
    title = row["タイトル"]
    if title != "V4":
        continue

    print(f"[{idx+1}/{total}] 検索中：{title}")

    try:
        # 検索ページへ
        driver.get("https://www.4gamer.net/")
        WebDriverWait(driver, 10).until(EC.visibility_of_element_located((By.NAME, "word")))

        search_box = driver.find_element(By.NAME, "word")
        search_box.clear()
        search_box.send_keys(title)
        search_box.send_keys(Keys.RETURN)

        # 検索結果のリンク取得
        WebDriverWait(driver, 10).until(
            EC.visibility_of_all_elements_located((By.CSS_SELECTOR, 'h2 > a[href^="/games/"]'))
        )
        links = driver.find_elements(By.CSS_SELECTOR, 'h2 > a[href^="/games/"]')
        game_links = [l.get_attribute("href") for l in links if l.get_attribute("href") and "/games/" in l.get_attribute("href")]

        if not game_links:
            print(" → ゲーム紹介ページが見つかりません")
            not_found_titles.append(title)
            continue

        # 各リンクをたどって、発売日＋備考が両方見つかったページを採用
        for url in game_links:
            if not url.startswith("http"):
                url = "https://www.4gamer.net" + url

            driver.get(url)
            WebDriverWait(driver, 10).until(EC.visibility_of_element_located((By.TAG_NAME, "th")))
            th_list = driver.find_elements(By.TAG_NAME, "th")

            # 発売日と備考の仮変数
            local_release_date = None
            local_remarks = None

            for th in th_list:
                if "発売日" in th.text:
                    td_list = th.find_elements(By.XPATH, "./following-sibling::td")
                    if len(td_list) >= 2:
                        date_text = td_list[1].text.strip()
                        match = re.search(r"\d{4}/\d{2}/\d{2}", date_text)
                        if match:
                            date_str = match.group()
                            parsed = datetime.strptime(date_str, "%Y/%m/%d")
                            local_release_date = (parsed, date_str)

                if "備考" in th.text:
                    td_list = th.find_elements(By.XPATH, "./following-sibling::td")
                    if td_list:
                        a_tags = td_list[0].find_elements(By.TAG_NAME, "a")
                        remarks = [a.text.strip() for a in a_tags if a.text.strip()]
                        if remarks:
                            local_remarks = ", ".join(remarks)

            # 両方そろったら採用してループ終了
            if local_release_date:
                df.at[idx, "発売日"] = local_release_date[1]
                if local_remarks:
                    df.at[idx, "備考"] = local_remarks
                print(f" → 最も古い発売日：{local_release_date[1]}")
                if local_remarks:
                    print(f" → 備考：{local_remarks}")
                break  # 最初に見つかった正しいページで打ち切り

        else:
            print(" → 発売日が見つかりません")
            not_found_titles.append(title)

    except TimeoutException:
        print(" → 検索結果が見つかりません（タイムアウト）")
        not_found_titles.append(title)
        continue

    except Exception:
        print(" → エラーが発生しました")
        traceback.print_exc()
        not_found_titles.append(title)
        continue

# --- 結果保存 ---
df.to_csv("4gamer_output_v4_corrected.csv", index=False, encoding="utf-8-sig")

# --- 終了処理 ---
driver.quit()
print("\n完了しました。取得できなかったタイトル数:", len(not_found_titles))


[280/393] 検索中：V4
 → 最も古い発売日：2020/09/24

完了しました。取得できなかったタイトル数: 0


In [5]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
from datetime import datetime
import pandas as pd
import traceback
import re

# --- データ読み込み ---
df = pd.read_csv("cleaned_unique_titles.csv")
df["発売日"] = ""
df["備考"] = ""
total = len(df)
not_found_titles = []

# --- Selenium起動 ---
driver = webdriver.Chrome()

# --- V4だけ処理 ---
for idx, row in df.iterrows():
    title = row["タイトル"]
    if title != "V4":
        continue

    print(f"[{idx+1}/{total}] 検索中：{title}")

    try:
        # 検索ページへ
        driver.get("https://www.4gamer.net/")
        WebDriverWait(driver, 10).until(EC.visibility_of_element_located((By.NAME, "word")))

        search_box = driver.find_element(By.NAME, "word")
        search_box.clear()
        search_box.send_keys(title)
        search_box.send_keys(Keys.RETURN)

        # 検索結果リンク取得
        WebDriverWait(driver, 10).until(
            EC.visibility_of_all_elements_located((By.CSS_SELECTOR, 'h2 > a[href^="/games/"]'))
        )
        links = driver.find_elements(By.CSS_SELECTOR, 'h2 > a[href^="/games/"]')
        game_links = [l.get_attribute("href") for l in links if l.get_attribute("href") and "/games/" in l.get_attribute("href")]

        if not game_links:
            print(" → ゲーム紹介ページが見つかりません")
            not_found_titles.append(title)
            continue

        # 詳細ページを順に探索
        for url in game_links:
            if not url.startswith("http"):
                url = "https://www.4gamer.net" + url

            driver.get(url)
            WebDriverWait(driver, 10).until(EC.visibility_of_element_located((By.TAG_NAME, "th")))
            th_list = driver.find_elements(By.TAG_NAME, "th")

            local_release_date = None
            local_remarks = None

            # 発売日探索
            for th in th_list:
                if "発売日" in th.text:
                    td_list = th.find_elements(By.XPATH, "./following-sibling::td")
                    if len(td_list) >= 2:
                        date_text = td_list[1].text.strip()
                        match = re.search(r"\d{4}/\d{2}/\d{2}", date_text)
                        if match:
                            date_str = match.group()
                            parsed = datetime.strptime(date_str, "%Y/%m/%d")
                            local_release_date = (parsed, date_str)
                    break

            # 備考探索：①旧構造（<th>備考</th>）
            for th in th_list:
                if "備考" in th.text:
                    td_list = th.find_elements(By.XPATH, "./following-sibling::td")
                    if td_list:
                        a_tags = td_list[0].find_elements(By.TAG_NAME, "a")
                        if a_tags:
                            remarks = [a.text.strip() for a in a_tags if a.text.strip()]
                            if remarks:
                                local_remarks = ", ".join(remarks)
                    break

            # 備考探索：②新構造（ul.taglist > li > a）
            if not local_remarks:
                try:
                    ul_elem = driver.find_element(By.CSS_SELECTOR, "ul.taglist")
                    a_tags = ul_elem.find_elements(By.TAG_NAME, "a")
                    remarks = [a.text.strip() for a in a_tags if a.text.strip()]
                    if remarks:
                        local_remarks = ", ".join(remarks)
                except:
                    pass  # 備考が存在しないページはスキップでOK

            # 成功したら保存して break
            if local_release_date:
                df.at[idx, "発売日"] = local_release_date[1]
                if local_remarks:
                    df.at[idx, "備考"] = local_remarks
                print(f" → 最も古い発売日：{local_release_date[1]}")
                if local_remarks:
                    print(f" → 備考：{local_remarks}")
                break

        else:
            print(" → 発売日が見つかりません")
            not_found_titles.append(title)

    except TimeoutException:
        print(" → 検索結果が見つかりません（タイムアウト）")
        not_found_titles.append(title)
        continue

    except Exception:
        print(" → エラーが発生しました")
        traceback.print_exc()
        not_found_titles.append(title)
        continue

# --- 結果保存 ---
df.to_csv("4gamer_output_v4_final.csv", index=False, encoding="utf-8-sig")

# --- 終了処理 ---
driver.quit()
print("\n完了しました。取得できなかったタイトル数:", len(not_found_titles))


[280/393] 検索中：V4
 → 最も古い発売日：2020/09/24

完了しました。取得できなかったタイトル数: 0


In [6]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
from datetime import datetime
import pandas as pd
import traceback
import re

# --- データ読み込み ---
df = pd.read_csv("cleaned_unique_titles.csv")
df["発売日"] = ""
df["備考"] = ""
total = len(df)
not_found_titles = []

# --- Selenium起動 ---
driver = webdriver.Chrome()

# --- V4だけ処理 ---
for idx, row in df.iterrows():
    title = row["タイトル"]
    if title != "V4":
        continue

    print(f"[{idx+1}/{total}] 検索中：{title}")

    try:
        # 検索ページへ
        driver.get("https://www.4gamer.net/")
        WebDriverWait(driver, 10).until(EC.visibility_of_element_located((By.NAME, "word")))

        search_box = driver.find_element(By.NAME, "word")
        search_box.clear()
        search_box.send_keys(title)
        search_box.send_keys(Keys.RETURN)

        # 検索結果リンク取得
        WebDriverWait(driver, 10).until(
            EC.visibility_of_all_elements_located((By.CSS_SELECTOR, 'h2 > a[href^="/games/"]'))
        )
        links = driver.find_elements(By.CSS_SELECTOR, 'h2 > a[href^="/games/"]')
        game_links = [l.get_attribute("href") for l in links if l.get_attribute("href") and "/games/" in l.get_attribute("href")]

        if not game_links:
            print(" → ゲーム紹介ページが見つかりません")
            not_found_titles.append(title)
            continue

        # 詳細ページを順に探索
        for url in game_links:
            if not url.startswith("http"):
                url = "https://www.4gamer.net" + url

            driver.get(url)
            WebDriverWait(driver, 10).until(EC.visibility_of_element_located((By.TAG_NAME, "th")))
            th_list = driver.find_elements(By.TAG_NAME, "th")

            local_release_date = None
            local_remarks = None

            # 発売日取得（td[1]）
            for th in th_list:
                if "発売日" in th.text:
                    td_list = th.find_elements(By.XPATH, "./following-sibling::td")
                    if len(td_list) >= 2:
                        date_text = td_list[1].text.strip()
                        match = re.search(r"\d{4}/\d{2}/\d{2}", date_text)
                        if match:
                            date_str = match.group()
                            parsed = datetime.strptime(date_str, "%Y/%m/%d")
                            local_release_date = (parsed, date_str)
                    break

            # 備考取得（td[1]から<a>タグを取得）←小コードと同じ
            for th in th_list:
                if "備考" in th.text:
                    td_list = th.find_elements(By.XPATH, "./following-sibling::td")
                    if len(td_list) >= 2:
                        a_tags = td_list[1].find_elements(By.TAG_NAME, "a")
                        remarks = [a.text.strip() for a in a_tags if a.text.strip()]
                        if remarks:
                            local_remarks = ", ".join(remarks)
                    break

            # 両方見つかったら採用
            if local_release_date:
                df.at[idx, "発売日"] = local_release_date[1]
                if local_remarks:
                    df.at[idx, "備考"] = local_remarks
                print(f" → 最も古い発売日：{local_release_date[1]}")
                if local_remarks:
                    print(f" → 備考：{local_remarks}")
                break

        else:
            print(" → 発売日が見つかりません")
            not_found_titles.append(title)

    except TimeoutException:
        print(" → 検索結果が見つかりません（タイムアウト）")
        not_found_titles.append(title)
        continue

    except Exception:
        print(" → エラーが発生しました")
        traceback.print_exc()
        not_found_titles.append(title)
        continue

# --- 結果保存 ---
df.to_csv("4gamer_output_v4_final.csv", index=False, encoding="utf-8-sig")

# --- 終了処理 ---
driver.quit()
print("\n完了しました。取得できなかったタイトル数:", len(not_found_titles))


[280/393] 検索中：V4
 → 最も古い発売日：2020/09/24
 → 備考：韓国, 基本プレイ無料

完了しました。取得できなかったタイトル数: 0
