In [18]:
import polars as pl
import polars.selectors as cs
import soccerdata as sd
import matplotlib.pyplot as plt
import sklearn as skl
import pandas as pd
import numpy as np
import pyarrow
import seaborn as sb

In [8]:
ws = sd.WhoScored(leagues="ENG-Premier League", seasons=2025)
print(ws.__doc__)

Provides pd.DataFrames from data available at http://whoscored.com.

    Data will be downloaded as necessary and cached locally in
    ``~/soccerdata/data/WhoScored``.

    Parameters
    ----------
    leagues : string or iterable, optional
        IDs of Leagues to include.
    seasons : string, int or list, optional
        Seasons to include. Supports multiple formats.
        Examples: '16-17'; 2016; '2016-17'; [14, 15, 16]
    proxy : 'tor' or dict or list(dict) or callable, optional
        Use a proxy to hide your IP address. Valid options are:
            - "tor": Uses the Tor network. Tor should be running in
              the background on port 9050.
            - dict: A dictionary with the proxy to use. The dict should be
              a mapping of supported protocols to proxy addresses. For example::

                  {
                      'http': 'http://10.10.1.10:3128',
                      'https': 'http://10.10.1.10:1080',
                  }

            - list

In [9]:
epl_schedule = ws.read_schedule()

In [10]:
epl_schedule.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,stage_id,game_id,status,start_time,home_team_id,home_team,home_yellow_cards,home_red_cards,away_team_id,away_team,...,period,extra_result_field,home_extratime_score,away_extratime_score,home_penalty_score,away_penalty_score,started_at_utc,first_half_ended_at_utc,second_half_started_at_utc,stage
league,season,game,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
ENG-Premier League,2526,2025-08-15 Liverpool-Bournemouth,24533,1903117,6,2025-08-15T20:00:00,26,Liverpool,1,0,183,Bournemouth,...,7,,,,,,2025-08-15T19:01:03Z,2025-08-15T19:49:06Z,2025-08-15T20:05:06Z,
ENG-Premier League,2526,2025-08-16 Aston Villa-Newcastle,24533,1903118,6,2025-08-16T12:30:00,24,Aston Villa,1,1,23,Newcastle,...,7,,,,,,2025-08-16T11:31:20Z,2025-08-16T12:16:37Z,2025-08-16T12:32:16Z,
ENG-Premier League,2526,2025-08-16 Brighton-Fulham,24533,1903119,6,2025-08-16T15:00:00,211,Brighton,3,0,170,Fulham,...,7,,,,,,2025-08-16T14:01:01Z,2025-08-16T14:47:01Z,2025-08-16T15:03:12Z,
ENG-Premier League,2526,2025-08-16 Sunderland-West Ham,24533,1903121,6,2025-08-16T15:00:00,16,Sunderland,0,0,29,West Ham,...,7,,,,,,2025-08-16T14:01:37Z,2025-08-16T14:46:39Z,2025-08-16T15:02:56Z,
ENG-Premier League,2526,2025-08-16 Tottenham-Burnley,24533,1903122,6,2025-08-16T15:00:00,30,Tottenham,0,0,184,Burnley,...,7,,,,,,2025-08-16T14:01:24Z,2025-08-16T14:47:40Z,2025-08-16T15:03:17Z,


In [30]:
epl = pd.DataFrame(epl_schedule)
home_manutd = epl[epl['home_team'] == 'Manchester United']
away_manutd = epl[epl['away_team'] == 'Manchester United']

manutd = pd.concat([home_manutd, away_manutd])

manutd

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,stage_id,game_id,status,start_time,home_team_id,home_team,home_yellow_cards,home_red_cards,away_team_id,away_team,...,period,extra_result_field,home_extratime_score,away_extratime_score,home_penalty_score,away_penalty_score,started_at_utc,first_half_ended_at_utc,second_half_started_at_utc,stage
league,season,game,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
ENG-Premier League,2526,2025-08-17 Manchester United-Arsenal,24533,1903125,6,2025-08-17T16:30:00,32,Manchester United,1,0,13,Arsenal,...,7,,,,,,2025-08-17T15:30:48Z,2025-08-17T16:16:54Z,2025-08-17T16:32:38Z,
ENG-Premier League,2526,2025-08-30 Manchester United-Burnley,24533,1903146,6,2025-08-30T15:00:00,32,Manchester United,1,0,184,Burnley,...,7,,,,,,2025-08-30T14:00:20Z,2025-08-30T14:53:21Z,2025-08-30T15:08:23Z,
ENG-Premier League,2526,2025-09-20 Manchester United-Chelsea,24533,1903167,6,2025-09-20T17:30:00,32,Manchester United,1,1,15,Chelsea,...,7,,,,,,2025-09-20T16:30:11Z,2025-09-20T17:25:11Z,2025-09-20T17:42:55Z,
ENG-Premier League,2526,2025-10-04 Manchester United-Sunderland,24533,1903176,6,2025-10-04T15:00:00,32,Manchester United,1,0,16,Sunderland,...,7,,,,,,2025-10-04T14:00:42Z,2025-10-04T14:52:31Z,2025-10-04T15:07:43Z,
ENG-Premier League,2526,2025-10-25 Manchester United-Brighton,24533,1903204,6,2025-10-25T17:30:00,32,Manchester United,2,0,211,Brighton,...,7,,,,,,2025-10-25T16:30:52Z,2025-10-25T17:17:05Z,2025-10-25T17:33:04Z,
ENG-Premier League,2526,2025-11-24 Manchester United-Everton,24533,1903234,6,2025-11-24T20:00:00,32,Manchester United,2,0,31,Everton,...,7,,,,,,2025-11-24T20:01:05Z,2025-11-24T20:50:06Z,2025-11-24T21:06:05Z,
ENG-Premier League,2526,2025-12-04 Manchester United-West Ham,24533,1903254,6,2025-12-04T20:00:00,32,Manchester United,2,0,29,West Ham,...,7,,,,,,2025-12-04T20:00:07Z,2025-12-04T20:46:13Z,2025-12-04T21:02:16Z,
ENG-Premier League,2526,2025-12-15 Manchester United-Bournemouth,24533,1903490,6,2025-12-15T20:00:00,32,Manchester United,2,0,183,Bournemouth,...,7,,,,,,2025-12-15T20:00:20Z,2025-12-15T20:50:53Z,2025-12-15T21:06:10Z,
ENG-Premier League,2526,2025-12-26 Manchester United-Newcastle,24533,1903319,1,2025-12-26T20:00:00,32,Manchester United,0,0,23,Newcastle,...,0,,,,,,,,,
ENG-Premier League,2526,2025-12-30 Manchester United-Wolves,24533,1903351,1,2025-12-30T20:15:00,32,Manchester United,0,0,161,Wolves,...,0,,,,,,,,,


In [33]:
list = print(manutd['game_id'])
list

league              season  game                                          
ENG-Premier League  2526    2025-08-17 Manchester United-Arsenal              1903125
                            2025-08-30 Manchester United-Burnley              1903146
                            2025-09-20 Manchester United-Chelsea              1903167
                            2025-10-04 Manchester United-Sunderland           1903176
                            2025-10-25 Manchester United-Brighton             1903204
                            2025-11-24 Manchester United-Everton              1903234
                            2025-12-04 Manchester United-West Ham             1903254
                            2025-12-15 Manchester United-Bournemouth          1903490
                            2025-12-26 Manchester United-Newcastle            1903319
                            2025-12-30 Manchester United-Wolves               1903351
                            2026-01-17 Manchester United-Manchest