In [1]:
import asyncio
from pyppeteer import launch
import json
from concurrent.futures import ThreadPoolExecutor

async def get_set_cost(set_num):
    url = f"https://www.brickeconomy.com/set/{set_num}-1/"
    # Launch the browser
    browser = await launch(headless=True, args=['--no-sandbox', '--disable-setuid-sandbox', '--disable-dev-shm-usage', '--disable-gpu', '--disable-software-rasterizer', '--disable-web-security', '--disable-features=IsolateOrigins,site-per-process', '--blink-settings=imagesEnabled=false', '--disable-notifications', '--disable-infobars'])
    # Create a new page
    page = await browser.newPage()
    # Disable CSS and JavaScript
    await page.setBypassCSP(True)
    await page.setJavaScriptEnabled(False)
    # Navigate to the URL
    await page.goto(url)
    # Find the element containing the retail price
    try:
        retail_price_element = await page.xpath("//div[contains(text(), 'Retail price')]")
    except:
        await browser.close()
        return -1, -1
    
    # Get the price from the next sibling element
    price_str = await retail_price_element[0].xpath("./following-sibling::div")
    price_str = await price_str[0].getProperty('textContent')
    price_str = await price_str.jsonValue()
    price = float(price_str.strip().replace('$', '').replace(',', ''))
    # Find the element containing the value
    value_element = await page.xpath("//div[contains(text(), 'Value')]")
    value_str = await value_element[0].xpath("./following-sibling::div")
    value_str = await value_str[0].getProperty('textContent')
    value_str = await value_str.jsonValue()
    value = float(value_str.strip().replace('$', '').replace(',', ''))
    # Close the browser
    await browser.close()
    
    return price, value


async def download_sets(set_ids):
    with ThreadPoolExecutor(max_workers=10) as executor:
        loop = asyncio.get_event_loop()
        tasks = [
            loop.run_in_executor(executor, get_set_cost, set_id[0])
            for set_id in set_ids
        ]
        results = await asyncio.gather(*tasks)
    return {set_id[0]: result for set_id, result in zip(set_ids, results)}


In [2]:
# read file 'set_ids'
with open('set_ids', 'r') as f:
    set_ids = f.readlines()
# split by " " into a tuple
set_ids = [set_id.split(" ") for set_id in set_ids]
# reorder as (id, name), where te first element is the id and all the rest is the name
set_ids = [(set_id[0], " ".join(set_id[1:])) for set_id in set_ids]
set_ids = [(set_id[0], set_id[1].replace("\n", "")) for set_id in set_ids]
set_ids = [set_id for set_id in set_ids if not set_id[0].isalpha()]

In [4]:

async def main():
    # Download the set data asynchronously
    results = await download_sets(set_ids)

    # Print the results to the console
    for set_id, data in results.items():
        print(f"Set ID: {set_id}, Price: {data['price']}, Value: {data['value']}")

    # Save the results to a JSON file
    with open('set_data.json', 'w') as f:
        json.dump(results, f)

if __name__ == '__main__':
    loop = asyncio.get_event_loop()
    try:
        loop.run_until_complete(main())
    finally:
        loop.close()


RuntimeError: Cannot close a running event loop

In [9]:
loop??

  return compile(source, filename, mode, flags,


[0;31mType:[0m        _UnixSelectorEventLoop
[0;31mString form:[0m <_UnixSelectorEventLoop running=True closed=False debug=False>
[0;31mFile:[0m        ~/mambaforge/envs/sage/lib/python3.10/asyncio/unix_events.py
[0;31mSource:[0m     
[0;32mclass[0m [0m_UnixSelectorEventLoop[0m[0;34m([0m[0mselector_events[0m[0;34m.[0m[0mBaseSelectorEventLoop[0m[0;34m)[0m[0;34m:[0m[0;34m[0m
[0;34m[0m    [0;34m"""Unix event loop.[0m
[0;34m[0m
[0;34m    Adds signal handling and UNIX Domain Socket support to SelectorEventLoop.[0m
[0;34m    """[0m[0;34m[0m
[0;34m[0m[0;34m[0m
[0;34m[0m    [0;32mdef[0m [0m__init__[0m[0;34m([0m[0mself[0m[0;34m,[0m [0mselector[0m[0;34m=[0m[0;32mNone[0m[0;34m)[0m[0;34m:[0m[0;34m[0m
[0;34m[0m        [0msuper[0m[0;34m([0m[0;34m)[0m[0;34m.[0m[0m__init__[0m[0;34m([0m[0mselector[0m[0;34m)[0m[0;34m[0m
[0;34m[0m        [0mself[0m[0;34m.[0m[0m_signal_handlers[0m [0;34m=[0m [0;34m{[0m[0;34m