In [1]:
import requests
import re
import json

In [2]:
source = 'https://raw.githubusercontent.com/smogon/pokemon-showdown/refs/heads/master/data/'

Data is being pulled from the aforementioned open-source [Pokemon Showdown](https://github.com/smogon/pokemon-showdown), including a database which they use for their [Pokedex](https://www.smogon.com/dex/sv/pokemon/). This includes both a Pokemon's general information, and its tiering data, which is all that we need for this project.

In [3]:
# import pokedex
r = requests.get(f'{source}pokedex.ts')
with open("pokedex.ts", "w") as ts_file:
    ts_file.write(r.text)

In [4]:
# smogon tier data
r = requests.get(f'{source}formats-data.ts')
with open("tiers.ts", "w") as ts_file:
    ts_file.write(r.text)

Although the data is actually in the form of javascript .ts files, it's actually quite easy to use regex to convert it into a json file. After removing the "code" around the database, all that's left is to put quotation marks around all of the strings and remove extra trailing commas and semicolons.

In [5]:
def reformat(path):

    with open(f"{path}.ts", "r") as file:
        content = file.read()

    json_text = re.sub(r"^export const .+?= ", "", content.strip(), flags=re.DOTALL) # remove first line
    json_text = re.sub(r"//.*", "", json_text) # remove comments
    json_text = re.sub(r"(\b\w+\b):", r'"\1":', json_text) # add quotes to every word
    json_text = re.sub(r"(?<!\w)'(\w+)'(?!\w)", r'"\1"', json_text) # replace apostrophes
    json_text = json_text.rstrip(";") # remove trailing semicolon
    json_text = re.sub(r",\s*([\}\]])", r"\1", json_text) # remove trailing commas
    
    json_text = json_text.replace("\"Type\": Null", "Type: Null") # for the only pokemon out of 1000+ with a colon in its name

    json_data = json.loads(json_text)
    with open(f'{path}.json', "w") as json_file:
        json.dump(json_data, json_file, indent=4)

In [6]:
reformat('tiers')
reformat('pokedex')