In [22]:
import asyncio
from urllib.parse import urljoin
from playwright.async_api import async_playwright
import nest_asyncio

async def get_qas_urls_async():
    async with async_playwright() as playwright:
        browser = await playwright.chromium.launch()
        page = await browser.new_page()
        base_url = "https://www.cite-sciences.fr/fr/ressources/juniors"
        await page.goto(base_url)

        web_experiences = []

        # SMART GAMES
        smart_games = await page.query_selector_all("#grid .item-grid.bgJunior")
        # Work with the filtered elements
        # print("SMART GAMES")
        for element in smart_games:
            title = await page.evaluate("el => el.querySelector('.titre').textContent", element)
            description = await page.evaluate("el => el.querySelector('.texte').innerText.trim()", element)
            # print("title:", title)
            # print("description:", description)
            href = await page.evaluate("el => el.querySelector('a')?.getAttribute('href')", element)
            absolute_url = urljoin(base_url, href)
            # print("Link href:", absolute_url)
            web_experiences.append({
                "title": title,
                "description": description,
                "url": absolute_url,
            })

        # TINKERING AND MANIPULATIONS
        # print("TINKERING AND MANIPULATIONS")
        tinkering_experiences = await page.query_selector_all("#grid .item-grid.bgLight")
        for element in tinkering_experiences:
            parent_title = await page.evaluate("el => el.querySelector('.titre').textContent", element)
            description = await page.evaluate("el => el.querySelector('.texte').innerText.trim()", element)
            # print("title:", title)
            # print("description:", description)
            href = await page.evaluate("el => el.querySelector('a')?.getAttribute('href')", element)
            absolute_url = urljoin(base_url, href)
            # print("Link href:", absolute_url)
            # 
            new_page = await browser.new_page()
            await new_page.goto(absolute_url)
            experiences = await new_page.query_selector_all("#grid .item-grid .contenu:has(a)")
            for experience in experiences:
                title = await new_page.evaluate("el => el.querySelector('.ce-bodytext>h3').textContent", experience)            
                description = await new_page.evaluate("""
                    (element) => {
                        const paragraphs = element.querySelectorAll('.ce-bodytext p');
                        return Array.from(paragraphs).map(p => p.innerText).join('\\n');
                    }
                """, experience)
                experience_url = await new_page.evaluate("""
                    (element) => {
                        const url = Array.from(element.querySelectorAll('.ce-bodytext a')).pop();
                        if (url) {
                            return url.getAttribute('href')
                        } else {
                            return "";
                        }
                    }
                """, experience)
                
                # print("========== title", title)
                # print("========== description", description)
                web_experiences.append({
                    "title": f"{parent_title} / {title}",
                    "description": description,
                    "type": "test",
                    "url": urljoin(absolute_url, experience_url),
                })

        for exp in web_experiences:
            print(exp)
            print('==============')
        await page.close()
        await browser.close()

def get_qas_urls():
    nest_asyncio.apply()
    loop = asyncio.get_event_loop()
    return loop.run_until_complete(get_qas_urls_async())

def main():
    get_qas_urls()

main()

{'title': 'Tranches de vie au Moyen Âge', 'description': "Remonte le temps jusqu'au Moyen Âge et découvre le travail des paysans, la nourriture des seigneurs et des paysans, la confection du drap de laine, le commerce, les voyages...", 'url': 'http://www.cite-sciences.fr/juniors/moyen-age/index.html'}
{'title': "Le froid ? C'est chaud !", 'description': "Fabrique un frigo du désert, devine des températures, classe des animaux selon qu'ils sont endothermes ou ectothermes, teste tes connaissances...", 'url': 'http://www.cite-sciences.fr/juniors/froid-chaud/index.html'}
{'title': 'Sur les pas de Darwin aux Galápagos', 'description': 'Tout en découvrant la faune et la flore des Galápagos, initie-toi à la classification des animaux, aux réseaux alimentaires, aux mécanismes de la sélection naturelle...', 'url': 'http://www.cite-sciences.fr/juniors/darwin-galapagos/index.html'}
{'title': 'Agriculture, alimentation / Guillaume, apprenti boulanger', 'description': "Suis Guillaume, jeune apprent