In [2]:
# Cell 1: Import necessary libraries
import json
from selenium import webdriver
from selenium.webdriver.chrome.service import Service as ChromeService
from webdriver_manager.chrome import ChromeDriverManager
from selenium.common import exceptions
import time


In [13]:
# Cell 2: Set up the WebDriver
options = webdriver.ChromeOptions()
options.set_capability(
    'goog:loggingPrefs', { "performance": "ALL", "browser": "ALL"}
)
driver = webdriver.Chrome(service=ChromeService(ChromeDriverManager().install()), options=options)

In [14]:
# Cell 3: Navigate to the page
driver.set_page_load_timeout(10)
try: 
    driver.get('https://www.sofascore.com/czech-republic-turkey/aUbsoUb')
except:
    pass

In [15]:
# Cell 4: Scroll the page
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
time.sleep(5)

In [16]:
# Cell 5: Get performance logs
logs_raw = driver.get_log("performance")

In [17]:
# Cell 6: Process logs
logs = [json.loads(lr['message'])['message'] for lr in logs_raw]

In [18]:
# Cell 7: Find the specific network request
for x in logs:
    if 'lineups' in x['params'].get('headers', {}).get(':path', ''):
        print(x['params'].get('headers', {}).get(':path', ''))
        break

/api/v1/event/11873986/lineups


In [19]:
try:
    # Extract the request ID for the lineups request
    request_id = None
    for x in logs:
        if 'lineups' in x['params'].get('headers', {}).get(':path', ''):
            request_id = x['params'].get('requestId', '')
            break

    if request_id:
        # Execute the CDP command to get the response body
        response_body = driver.execute_cdp_cmd("Network.getResponseBody", {"requestId": request_id})
        player_stats = json.loads(response_body["body"])
    else:
        raise ValueError("Request ID for 'lineups' not found in logs")

except Exception as e:
    print(f"An error occurred: {e}")


An error occurred: Message: unknown error: unhandled inspector error: {"code":-32000,"message":"No data found for resource with given identifier"}
  (Session info: chrome=126.0.6478.127)
Stacktrace:
0   chromedriver                        0x0000000104d06a80 chromedriver + 4385408
1   chromedriver                        0x0000000104cff38c chromedriver + 4354956
2   chromedriver                        0x000000010491cb0c chromedriver + 281356
3   chromedriver                        0x0000000104906af0 chromedriver + 191216
4   chromedriver                        0x00000001049051d4 chromedriver + 184788
5   chromedriver                        0x0000000104905c4c chromedriver + 187468
6   chromedriver                        0x000000010491f5b0 chromedriver + 292272
7   chromedriver                        0x0000000104997d24 chromedriver + 785700
8   chromedriver                        0x0000000104953eec chromedriver + 507628
9   chromedriver                        0x00000001049548c4 chromedrive

In [None]:
# Cell 9: Parse player statistics
def parse_player_statistics(team_data):
    players_stats = []
    for player_info in team_data['players']:
        player = player_info['player']
        stats = player_info['statistics']
        player_stats = {
            'name': player['name'],
            'id': player['id'],
            'position': player_info['position'],
            'jerseyNumber': player_info['jerseyNumber'],
            'totalPass': stats.get('totalPass', 0),
            'accuratePass': stats.get('accuratePass', 0),
            'totalLongBalls': stats.get('totalLongBalls', 0),
            'accurateLongBalls': stats.get('accurateLongBalls', 0),
            'goalAssist': stats.get('goalAssist', 0),
            'saves': stats.get('saves', 0),
            'minutesPlayed': stats.get('minutesPlayed', 0),
            'touches': stats.get('touches', 0),
            'rating': stats.get('rating', 0),
            'possessionsLostCtrl': stats.get('possessionsLostCtrl', 0)
        }
        players_stats.append(player_stats)
    return players_stats

In [None]:
# Cell 10: Process home and away team statistics
home_team_stats = parse_player_statistics(player_stats['home'])
away_team_stats = parse_player_statistics(player_stats['away'])

In [None]:
# Cell 11: Combine home and away team statistics
all_player_stats = {
    'home': home_team_stats,
    'away': away_team_stats
}

In [None]:
# Cell 12: Save to JSON file
with open("player_stats.json", "w") as f:
    json.dump(all_player_stats, f)