In [1]:
import logging
import asyncio

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

import nest_asyncio
nest_asyncio.apply()

import asyncio
from playwright.async_api import async_playwright

In [2]:
async def fetch_json(url):
    json_response = {}

    try:
        async with async_playwright() as p:
            browser = await p.chromium.launch(headless=True)
            page = await browser.new_page()

            async def handle_response(response):
                if response.url == url and response.status == 200:
                    try:
                        json_response['data'] = await response.json()
                        logger.info(f"Successfully received JSON from {url}")
                    except Exception as e:
                        logger.error(f"Failed to parse JSON response: {e}")

            page.on("response", handle_response)

            logger.info(f"Navigating to {url}")
            await page.goto(url)
            await asyncio.sleep(2)
            await browser.close()

    except Exception as e:
        logger.exception(f"Exception occurred while fetching JSON from {url}: {e}")


    return json_response.get('data', None)


In [3]:
def get_country_id(country_name):
    url = "https://www.sofascore.com/api/v1/sport/football/categories"
    
    try:
        data = asyncio.run(fetch_json(url))
        if not data or "categories" not in data:
            logger.error("'categories' key missing in response or empty data")
            return -1

        for category in data["categories"]:
            if category.get("name") == country_name:
                return category.get("id", -1)
        
        logger.warning(f"Country '{country_name}' not found in categories")
        return -1

    except Exception as e:
        logger.exception(f"Exception occurred: {e}")
        return -1

In [4]:
def get_unique_tournament_ids(country_id):
    url = f"https://www.sofascore.com/api/v1/category/{country_id}/unique-tournaments"

    selected_tournaments = ["V-League 1", "V-League 2", "Vietname Cup", "Vietnamest Super Cup"]
    selected_ids = []

    try:
        data = asyncio.run(fetch_json(url))
        if not data:
            logger.error("No data returned from fetch_json")
            return []

        groups = data.get("groups")
        if not groups or "uniqueTournaments" not in groups:
            logger.error("'uniqueTournaments' key missing in response or empty 'groups'")
            return []

        for unique_tournament in groups["uniqueTournaments"]:
            name = unique_tournament.get("name")
            if name not in selected_tournaments:
                continue
            selected_ids.append(unique_tournament.get("id"))

    except Exception as e:
        logger.exception(f"Exception occurred: {e}")
        return []


In [5]:
country_id = get_country_id("Vietnam")
tournament_ids = get_unique_tournament_ids(country_id)

print(tournament_ids)

INFO:__main__:Navigating to https://www.sofascore.com/api/v1/sport/football/categories
ERROR:__main__:Exception occurred while fetching JSON from https://www.sofascore.com/api/v1/sport/football/categories: Page.goto: Timeout 30000ms exceeded.
Call log:
  - navigating to "https://www.sofascore.com/api/v1/sport/football/categories", waiting until "load"
Traceback (most recent call last):
  File "/tmp/ipykernel_1302/3384520698.py", line 20, in fetch_json
    await page.goto(url)
  File "/home/dottier/big_data/venv/lib/python3.11/site-packages/playwright/async_api/_generated.py", line 8975, in goto
    await self._impl_obj.goto(
  File "/home/dottier/big_data/venv/lib/python3.11/site-packages/playwright/_impl/_page.py", line 556, in goto
    return await self._main_frame.goto(**locals_to_params(locals()))
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/dottier/big_data/venv/lib/python3.11/site-packages/playwright/_impl/_frame.py", line 146, in goto
    aw

[]
