In [1]:
import json
import asyncio
import time
import httpx
import aiohttp

In [2]:
headers = {'Accept': 'Application/ld+json'}
http_top_level_nav = httpx.get('https://glossary.ecoinvent.org', headers=headers)

In [3]:
top_level_nav = json.loads(http_top_level_nav.text)

In [4]:
top_level_nav["itemListElement"][5]["item"]["url"]

'https://glossary.ecoinvent.org/intermediate-exchanges/'

In [5]:
http_intermediate_exchange_nav = httpx.get(top_level_nav["itemListElement"][5]["item"]["url"].replace("www.", ""), headers=headers)

In [6]:
intermediate_exchange_nav = json.loads(http_intermediate_exchange_nav.text)

In [7]:
intermediate_exchange_nav["numberOfItems"]

3837

In [9]:
intermediate_exchange_nav["itemListElement"][0]["item"]["@id"]

'https://glossary.ecoinvent.org/ids/5543d899-1cbd-4acf-a770-befd41102943/'

In [10]:
conn = aiohttp.TCPConnector(limit=None, ttl_dns_cache=300)
session = aiohttp.ClientSession(connector=conn)

In [11]:
async def gather_with_concurrency(n, *tasks):
    semaphore = asyncio.Semaphore(n)

    async def sem_task(task):
        async with semaphore:
            return await task

    return await asyncio.gather(*(sem_task(task) for task in tasks))

In [12]:
async def get_async(url):
    async with session.get(url["item"]["@id"], headers=headers) as response:
        return await response.read()

In [13]:
async def main():
    s = time.perf_counter()
    resps = await gather_with_concurrency(90, *map(get_async, intermediate_exchange_nav["itemListElement"]))
    data = [json.loads(resp) for resp in resps]
    elapsed = time.perf_counter() - s
    print(f"{elapsed}s")
    return data

In [14]:
all_int_ex = await main()

56.25303234299999s


In [15]:
session.close()

<coroutine object ClientSession.close at 0x7ff836295540>

In [17]:
int_ex = dict()
for entry in all_int_ex:
    int_ex[entry["slug"]]=entry
    