In [None]:
import requests
import os

if not os.path.exists('difficulties.html'):
    print("Downloading difficulties.html...")
    webpage = requests.get('https://jtohs-joke-towers.fandom.com/wiki/Main_Difficulty_Chart')
    with open('difficulties.html', 'w', encoding='utf-8') as file:
        file.write(webpage.text)
    html = webpage.text
else:
    print("Using existing difficulties.html...")
    with open('difficulties.html', 'r', encoding='utf-8') as file:
        html = file.read()
    
html = html.split("Baseline (Start of the Chart)</b>\n</td></tr>\n")[1].strip()

for entry in html.split('<tr>\n'):
    # find href="... and title="... in the entry
    src = entry.split('data-src="')
    title = entry.split('title="')
    if len(src) < 2 or len(title) < 2:
        continue  # skip if href or title is not found
        
    src = src[1].split('"')[0]
    title = title[1].split('"')[0].replace('w:c:jtoh:', '').replace('w:c:jtohs-hardest-towers:', '').replace('w:c:etoh-misc', '')

    # check if image is already downloaded
    path = os.path.join('images', title.split("/")[0] + ".png")
    if os.path.exists(path):
        print(f"Image {title} already exists, skipping download.")
        continue

    # download the images
    response = requests.get(src)
    if response.status_code == 200:
        with open(path, 'wb') as img_file:
            img_file.write(response.content)

Using existing difficulties.html...
Image The First Difficulty already exists, skipping download.
Image The Lower Gap already exists, skipping download.
Image Negativity already exists, skipping download.
Image Unimpossible already exists, skipping download.
Image Friendliness already exists, skipping download.
Image True Ease already exists, skipping download.
Image A already exists, skipping download.
Image Felix the ДА already exists, skipping download.
Image Exist already exists, skipping download.
Image Reversed Peripherality already exists, skipping download.
Image Relax already exists, skipping download.
Image Skip already exists, skipping download.
Image Restful already exists, skipping download.
Image Ifinity already exists, skipping download.
Image Instant Win already exists, skipping download.
Image Excavation Chain already exists, skipping download.
Image Millisecondless already exists, skipping download.
Image Astronomical already exists, skipping download.
Image Win alrea

In [55]:
import json

with open('source.wikitext', 'r') as file:
    content = file.read()
    
# get rid of text before '''Baseline (Start of the Chart)'''
difficulties = dict()
for entry in content.split('|-\n|[[File:')[1:]:
    lines = entry.split('\n')
    image = lines[0].split('|')[0]
    name = lines[1][3:-2].replace('w:c:jtoh:', '').replace('w:c:jtohs-hardest-towers:', '').replace('w:c:etoh-misc', '')
    rating = lines[3][1:]
    rating_split = rating.split('</font>')
    if len(rating_split) > 1:
        rating = rating_split[0].split('>')[-1]
    rating = rating.strip()
    difficulties[name] = {
        'rating': rating   
    }
print(difficulties)

{'Randomization': {'rating': ''}, 'Blank': {'rating': ''}, 'Construct': {'rating': ''}, 'Walking': {'rating': ''}, 'Reflective': {'rating': ''}, 'Sketchy': {'rating': ''}, 'Puzzling': {'rating': ''}, '15]] (EVEN': {'rating': ''}, 'Zemblanity': {'rating': ''}, 'Spectrumplex': {'rating': '-'}, 'Gingerbread|Gingerbread': {'rating': ''}, 'Epic|Epic (EToH)': {'rating': ''}, 'korn|korn': {'rating': ''}, 'OooOOOoOOOOooo above catastrophic difficulty OoOOooOOoo|OooOOOoOOOOooo above catastrophic difficulty OoOOooOOoo': {'rating': ''}, 'Sisyphus|Sisyphus': {'rating': ''}, 'Epinephrine|Epinephrine': {'rating': ''}, 'OVERKILL|OVERKILL': {'rating': ''}, 'MSD Speedy|MSD Speedy': {'rating': ''}, 'Gui remorseless|gui remorseless': {'rating': ''}, ':Divine|Divine (EToH)': {'rating': ''}, ':Gui nil|gui nil': {'rating': ''}, ':Anti Boby|Anti Boby': {'rating': '}'}, 'The First Difficulty': {'rating': '-∞'}, 'The Lower Gap': {'rating': '-∞ to 115'}, 'Negativity': {'rating': '-(FOOT<sup>10</sup>(10<sup>100<

In [62]:
if not os.path.exists('pages'):
    os.makedirs('pages')
    
for name, entry in difficulties.items():
    name = name.split("/")[0].split("|")[0]
    vcheck = name.split(":")
    if len(vcheck) > 1:
        name = vcheck[1]
        
    boldcheck = name.split("[[")
    if len(boldcheck) > 1:
        name = boldcheck[1].split("]]")[0]
    
    path = os.path.join('pages', name + '.html')
    if os.path.exists(path):
        print(f"Page for {name} already exists, skipping download.")
        continue
    
    page = requests.get(f'https://jtohs-joke-towers.fandom.com/wiki/{name}')
    if page.status_code != 200:
        print(f"Failed to fetch page for {name}")
        continue
    with open(path, 'w', encoding='utf-8') as file:
        file.write(page.text)

Page for Randomization already exists, skipping download.
Page for Blank already exists, skipping download.
Page for Construct already exists, skipping download.
Page for Walking already exists, skipping download.
Page for Reflective already exists, skipping download.
Page for Sketchy already exists, skipping download.
Page for Puzzling already exists, skipping download.
Failed to fetch page for 15]] (EVEN
Page for Zemblanity already exists, skipping download.
Page for Spectrumplex already exists, skipping download.
Failed to fetch page for Gingerbread
Page for Epic already exists, skipping download.
Failed to fetch page for korn
Failed to fetch page for OooOOOoOOOOooo above catastrophic difficulty OoOOooOOoo
Failed to fetch page for Sisyphus
Failed to fetch page for Epinephrine
Failed to fetch page for OVERKILL
Failed to fetch page for MSD Speedy
Failed to fetch page for Gui remorseless
Page for Divine already exists, skipping download.
Failed to fetch page for Gui nil
Failed to fetch