In [2]:
import pandas as pd
import numpy as np
from pathlib import Path
from urllib.request import Request, urlopen
import os
import json
import re

DATA_PATH = "./data"
RAW_DATA_PATH = os.path.join(DATA_PATH, "raw")
SHOWDOWN_URL = "https://raw.githubusercontent.com/smogon/pokemon-showdown/master/data/"

os.makedirs(DATA_PATH, exist_ok=True)
os.makedirs(RAW_DATA_PATH, exist_ok=True)

files = ["pokedex.ts", "moves.ts", "natures.ts",
         "items.ts", "abilities.ts", "typechart.ts",
         "learnsets.ts", "conditions.ts", "tags.ts"]

for filename in files:
    with urlopen(Request(SHOWDOWN_URL+filename, headers={"User-Agent": "Mozilla/5.0"})) as response, open(os.path.join(RAW_DATA_PATH, filename), "wb") as f:
        f.write(response.read())

In [None]:
filename = "pokedex.ts"
ts_path = Path("data/raw/pokedex.ts")
text = ts_path.read_text(encoding="utf-8")

In [None]:





# The file exports `Pokedex` typed as `SpeciesDataTable`.
var_name = "Pokedex"
anchor = f"export const {var_name}"
idx = text.find(anchor)
if idx == -1:
    raise ValueError(f"Could not find {anchor} in {ts_path}")

# Grab the object literal by matching braces.
start = text.find("{", idx)
if start == -1:
    raise ValueError("Could not find start of object literal")
depth = 0
end = None
for i, ch in enumerate(text[start:], start):
    if ch == "{":
        depth += 1
    elif ch == "}":
        depth -= 1
        if depth == 0:
            end = i
            break
if end is None:
    raise ValueError("Could not find end of object literal")
obj_text = text[start:end + 1]

# Convert TS object literal to JSON.
obj_text = re.sub(r"/\*.*?\*/", "", obj_text, flags=re.S)
obj_text = re.sub(r"//.*?$", "", obj_text, flags=re.M)
obj_text = re.sub(r"([{,]\s*)([A-Za-z_$][\w$]*|\d+)\s*:", r"\1\"\2\":", obj_text)
obj_text = re.sub(r",\s*([}\]])", r"\1", obj_text)

pokedex = json.loads(obj_text)
print(f"Loaded {len(pokedex)} species; sample keys: {list(pokedex)[:5]}")
