In [None]:
import requests
import os

if not os.path.exists('difficulties.html'):
    print("Downloading difficulties.html...")
    webpage = requests.get('https://jtohs-joke-towers.fandom.com/wiki/Main_Difficulty_Chart')
    with open('difficulties.html', 'w', encoding='utf-8') as file:
        file.write(webpage.text)
    html = webpage.text
else:
    print("Using existing difficulties.html...")
    with open('difficulties.html', 'r', encoding='utf-8') as file:
        html = file.read()
    
html = html.split("Baseline (Start of the Chart)</b>\n</td></tr>\n")[1].strip()

for entry in html.split('<tr>\n'):
    # find href="... and title="... in the entry
    src = entry.split('data-src="')
    title = entry.split('title="')
    if len(src) < 2 or len(title) < 2:
        continue  # skip if href or title is not found
        
    src = src[1].split('"')[0]
    title = title[1].split('"')[0].replace('w:c:jtoh:', '').replace('w:c:jtohs-hardest-towers:', '').replace('w:c:etoh-misc', '')

    # check if image is already downloaded
    path = os.path.join('images', title.split("/")[0] + ".png")
    if os.path.exists(path):
        print(f"Image {title} already exists, skipping download.")
        continue

    # download the images
    response = requests.get(src)
    if response.status_code == 200:
        with open(path, 'wb') as img_file:
            img_file.write(response.content)

Using existing difficulties.html...
Image The First Difficulty already exists, skipping download.
Image The Lower Gap already exists, skipping download.
Image Negativity already exists, skipping download.
Image Unimpossible already exists, skipping download.
Image Friendliness already exists, skipping download.
Image True Ease already exists, skipping download.
Image A already exists, skipping download.
Image Felix the ДА already exists, skipping download.
Image Exist already exists, skipping download.
Image Reversed Peripherality already exists, skipping download.
Image Relax already exists, skipping download.
Image Skip already exists, skipping download.
Image Restful already exists, skipping download.
Image Ifinity already exists, skipping download.
Image Instant Win already exists, skipping download.
Image Excavation Chain already exists, skipping download.
Image Millisecondless already exists, skipping download.
Image Astronomical already exists, skipping download.
Image Win alrea

In [10]:
import json

with open('source.wikitext', 'r') as file:
    content = file.read()
    
# get rid of text before '''Baseline (Start of the Chart)'''
difficulties = dict()
for entry in content.split('|-\n|[[File:')[1:]:
    lines = entry.split('\n')
    image = lines[0].split('|')[0]
    name = lines[1][3:-2].replace('w:c:jtoh:', '').replace('w:c:jtohs-hardest-towers:', '').replace('w:c:etoh-misc', '').split("/")[0].split("|")[0]
    vcheck = name.split(":")
    if len(vcheck) > 1:
        name = vcheck[1]
        
    boldcheck = name.split("[[")
    if len(boldcheck) > 1:
        name = boldcheck[1].split("]]")[0]
        
    rating = lines[3][1:]
    rating_split = rating.split('</font>')
    if len(rating_split) > 1:
        rating = rating_split[0].split('>')[-1]
    rating = rating.strip()
    difficulties[name] = {
        'rating': rating   
    }
print(difficulties)

{'Randomization': {'rating': ''}, 'Blank': {'rating': ''}, 'Construct': {'rating': ''}, 'Walking': {'rating': ''}, 'Reflective': {'rating': ''}, 'Sketchy': {'rating': ''}, 'Puzzling': {'rating': ''}, '15]] (EVEN': {'rating': ''}, 'Zemblanity': {'rating': ''}, 'Spectrumplex': {'rating': '-'}, 'Gingerbread': {'rating': ''}, 'Epic': {'rating': ''}, 'korn': {'rating': ''}, 'OooOOOoOOOOooo above catastrophic difficulty OoOOooOOoo': {'rating': ''}, 'Sisyphus': {'rating': ''}, 'Epinephrine': {'rating': ''}, 'OVERKILL': {'rating': ''}, 'MSD Speedy': {'rating': ''}, 'Gui remorseless': {'rating': ''}, 'Divine': {'rating': ''}, 'Gui nil': {'rating': ''}, 'Anti Boby': {'rating': '}'}, 'The First Difficulty': {'rating': '-∞'}, 'The Lower Gap': {'rating': '-∞ to 115'}, 'Negativity': {'rating': '-(FOOT<sup>10</sup>(10<sup>100</sup>))'}, 'Unimpossible': {'rating': '-Rayo(10<sup>100</sup>)'}, 'Friendliness': {'rating': '-TREE(3)'}, 'True Ease': {'rating': '-G<sub>64</sub>'}, 'Felix the ДА': {'rating': 

In [74]:
import fandom

cached = None
with open('difficulties.json', 'r') as file:
    cached = json.load(file)
cached = {k: v for k, v in cached.items() if 'description' in v and v['description'] is not None}

fandom.set_wiki('jtohs-joke-towers')
fandom.set_lang('en')
for name, data in difficulties.items():
    if name in cached:
        print(f"Skipping {name}, already cached.")
        continue
    
    try:
        description = fandom.page(name).content['sections'][0]['content']
    except Exception as e:
        print(f"Error fetching description for {name}: {e}")
        description = None
        
    data['description'] = description

Skipping Randomization, already cached.
Error fetching description for Blank: 'NoneType' object has no attribute 'name'
Skipping Construct, already cached.
Error fetching description for Walking: 'NoneType' object has no attribute 'name'
Skipping Reflective, already cached.
Skipping Sketchy, already cached.
Error fetching description for Puzzling: 'NoneType' object has no attribute 'name'
Error fetching description for 15]] (EVEN: "15]] (EVEN" does not match any pages. Try another query!
Error fetching description for Zemblanity: 'NoneType' object has no attribute 'name'
Error fetching description for Spectrumplex: invalid literal for int() with base 10: 'r'
Error fetching description for Gingerbread: "Gingerbread" does not match any pages. Try another query!
Skipping Epic, already cached.
Error fetching description for korn: "korn" does not match any pages. Try another query!
Error fetching description for OooOOOoOOOOooo above catastrophic difficulty OoOOooOOoo: "OooOOOoOOOOooo above 

KeyboardInterrupt: 

In [69]:
# export the difficulties to a JSON file
with open('difficulties.json', 'w') as file:
    json.dump(difficulties, file, indent=4)

In [70]:
fandom.page('Negativity').content

ValueError: invalid literal for int() with base 10: 'r'