In [1]:
import re
import requests
import json
from bs4 import BeautifulSoup

In [2]:
# ===== ユーティリティ関数群（共通） =====
def _text(s):
    return s.strip().replace("\xa0", " ") if s else ""

def _lower(s):
    return _text(s).lower()

def to_int(s):
    if not s:
        return None
    m = re.search(r"(\d[\d,]*)", str(s))
    return int(m.group(1).replace(",", "")) if m else None

def to_float_percent(s):
    if not s:
        return None
    m = re.search(r"([\d.]+)%", str(s))
    return float(m.group(1)) / 100.0 if m else None

def safe_set(d, key, val):
    if key not in d and val is not None:
        d[key] = val

def _extract_headers_and_mask(table):
    """thead か tr.table-headers の th を採用。header-note を除去し、checkbox列は除外"""
    ths = table.select("thead tr th")
    if not ths:
        ths = table.select("tbody tr.table-headers th")
    headers, keep_idx = [], []
    for i, th in enumerate(ths):
        for note in th.select(".header-note"):
            note.decompose()
        cls = " ".join(th.get("class", [])).lower()
        if "checkbox" in cls:
            continue
        label_el = th.select_one("a.table-sort") or th.select_one("span") or th.select_one("a")
        label = (label_el.get_text(strip=True) if label_el else th.get_text(strip=True)) or f"col{i+1}"
        headers.append(label)
        keep_idx.append(i)
    return headers, keep_idx

def _parse_table(table):
    """<table> → [ {header: value, ...}, ... ]（説明列や空行を除外）"""
    headers, keep_idx = _extract_headers_and_mask(table)
    rows = []
    for tr in table.select("tbody tr"):
        if "table-headers" in (tr.get("class") or []):
            continue
        tds = tr.find_all("td")
        if not tds:
            continue
        vals = []
        for j in keep_idx:
            if j < len(tds):
                vals.append(_text(tds[j].get_text()))
        if not any(vals):
            continue
        if headers and "Name" in headers:
            try:
                if not vals[headers.index("Name")]:
                    continue
            except Exception:
                pass
        rows.append(
            dict(zip(headers, vals))
            if headers and len(vals) == len(headers)
            else {f"col{k+1}": v for k, v in enumerate(vals)}
        )
    return headers, rows

def _select_fields(rows, wanted):
    return [{k: r.get(k) for k in wanted if k in r} for r in rows]

In [3]:
def fetch_weapon_data(url, base_url):
    headers_req = {
        "User-Agent": ("Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
                       "AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"),
        "Accept-Language": "en-US,en;q=0.9,ja;q=0.8",
    }
    r = requests.get(url, headers=headers_req, timeout=20)
    r.raise_for_status()
    soup = BeautifulSoup(r.text, "html.parser")

    # name
    name_part = url.replace(base_url, "").strip("/")
    name = name_part.replace("-", " ").title()
    data = {"name": name}

    # ---- 武器カード部分 ----
    card_div = soup.select_one("div.card-ffiii-weapons")
    if card_div:
        # card-field 走査
        for cf in card_div.select("div.card-field"):
            label_el = cf.select_one(".label")
            value_el = cf.select_one(".value")
            if not label_el or not value_el:
                continue
            key = _text(label_el.get_text())
            val = _text(value_el.get_text())
            # Type の (Icon) を削除
            if key == "Type" and val:
                val = val.replace("(Icon)", "").strip()
            # 数値変換
            if key in {"Base Power", "Price", "Value"}:
                val = to_int(val)
            elif key == "Base Accuracy":
                val = to_float_percent(val)
            safe_set(data, key.replace(" ", ""), val)

        # Equipped By
        eqs = []
        for a in card_div.select("div.equipment a.card-link"):
            t = _text(a.get_text())
            if t:
                eqs.append(t)
        if eqs:
            safe_set(data, "EquippedBy", eqs)

    # ---- Shops / Treasures（既存処理そのまま利用可能）----
    for block in soup.select("div.data-table"):
        cap = block.select_one(".table-caption")
        title = _text(cap.get_text()) if cap else ""
        tlow = _lower(title)
        tbl = block.select_one(".data-table-container > table")
        if not tbl:
            continue
        headers, rows = _parse_table(tbl)

        if tlow.startswith("shops: where"):
            wanted = ["Map", "Shop"]
            shops_rows = _select_fields(rows, wanted)
            if shops_rows:
                safe_set(data, "Shops", shops_rows)
        elif tlow.startswith("treasures: containing"):
            wanted = ["Treasure", "Map", "Notes", "Guarded By"]
            tre_rows = _select_fields(rows, wanted)
            if tre_rows:
                safe_set(data, "Treasures", tre_rows)

    return data

In [4]:
base_url = "https://guides.gamercorner.net/ffiii/weapons/"
weapon_name = "tonfa"  # ← 他の武器に変更可
url = base_url + weapon_name

json_out = fetch_weapon_data(url, base_url)
print(json.dumps(json_out, indent=2, ensure_ascii=False))

{
  "name": "Tonfa",
  "Type": "Nunchaku",
  "BasePower": 20,
  "BaseAccuracy": 0.8,
  "Price": 500,
  "Value": 250,
  "EquippedBy": [
    "OK",
    "Wa",
    "Mo",
    "WM",
    "BM",
    "RM",
    "Ra",
    "Kn",
    "Th",
    "Sc",
    "Ge",
    "Dr",
    "Vi",
    "BB",
    "MK",
    "Ev",
    "Ba",
    "Ma",
    "De",
    "Su",
    "Sa",
    "Ni"
  ],
  "Shops": [
    {
      "Map": "Dwarven Hollow",
      "Shop": "Weapons"
    }
  ],
  "Treasures": [
    {
      "Treasure": "Tonfa",
      "Map": "Castle Sasune",
      "Notes": "Chest in hidden area in main keep 3F",
      "Guarded By": "-"
    }
  ]
}
