In [1]:
import pandas as pd

In [18]:
import plotly.express as px 

In [2]:
from understatapi import UnderstatClient

In [3]:
understat = UnderstatClient()

In [4]:
# Premier League player data for 2022 season
pl_player_2022 = understat.league(league="EPL").get_player_data(season="2022")
pl_22 = pd.DataFrame(pl_player_2022)

In [5]:
# Premier League player data for 2023 season
pl_player_2023 = understat.league(league="EPL").get_player_data(season="2023")

In [6]:
# Helper function: gets season data for team inputted during select season
def get_team_data(team_name, year):

    with UnderstatClient() as understat:
        team_match_data = understat.team(team=team_name).get_match_data(season=year)
    
    return pd.DataFrame(team_match_data)

In [7]:
# Get data for every player playing in the Premier League in 2019/20
league_player_data = understat.league(league="EPL").get_player_data(season="2019")

# Get the name and id of one of the players
player_id, player_name = league_player_data[0]["id"], league_player_data[0]["player_name"]

# Get data for every shot this player has taken in a league match (for all seasons)
player_shot_data = understat.player(player=player_id).get_shot_data()

In [8]:
# Get data for every league match involving Manchester United in 2019/20
team_match_data = understat.team(team="Manchester_United").get_match_data(season="2019")

# Get the id for the first match of the season
match_id = team_match_data[0]["id"]

# Get the rosters for the both teams in that match
roster_data = understat.match(match=match_id).get_roster_data()

In [9]:
pl_player_df = pd.DataFrame(pl_player_2023)

# Data Cleaning

In [10]:
# Converting specific columns to numeric
cols_to_convert = ['xG', 'xA', 'npxG', 'xGChain', 'xGBuildup']

for col in cols_to_convert:
    pl_player_df[col] = pd.to_numeric(pl_player_df[col], errors='coerce')

In [11]:
# Rounding below columns to two decimal columns
cols_to_round = ['xG', 'xA', 'npxG', 'xGChain', 'xGBuildup']

for col in cols_to_round:
    pl_player_df[col] = pl_player_df[col].round(2)

In [12]:
pl_player_df

Unnamed: 0,id,player_name,games,time,goals,xG,assists,xA,shots,key_passes,yellow_cards,red_cards,position,team_title,npg,npxG,xGChain,xGBuildup
0,6049,Solly March,2,165,3,2.02,0,0.11,8,3,0,0,M,Brighton,3,2.02,2.44,0.39
1,6552,Bryan Mbeumo,2,180,3,2.97,0,0.09,6,1,0,0,F,Brentford,1,1.45,1.45,0.00
2,5232,Alexander Isak,2,148,2,1.16,0,0.00,3,0,0,0,F,Newcastle United,2,1.16,1.17,0.53
3,5786,Yoane Wissa,2,164,2,1.21,0,0.07,10,2,0,0,F,Brentford,2,1.21,0.76,0.00
4,7814,Taiwo Awoniyi,2,99,2,1.06,0,0.08,2,2,0,0,F S,Nottingham Forest,2,1.06,1.14,0.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
337,11730,Anis Ben Slimane,1,9,0,0.00,0,0.00,0,0,1,0,S,Sheffield United,0,0.00,0.00,0.00
338,11731,Andre Brooks,2,16,0,0.00,0,0.00,0,0,0,0,S,Sheffield United,0,0.00,0.00,0.00
339,11807,Ian Maatsen,1,2,0,0.00,0,0.00,0,0,0,0,S,Chelsea,0,0.00,0.00,0.00
340,11810,Mason Burstow,1,1,0,0.00,0,0.00,0,0,0,0,S,Chelsea,0,0.00,0.00,0.00


In [13]:
saka_id = pl_player_df[pl_player_df['player_name'] == 'Bukayo Saka'].iloc[0]['id']
saka_id

'7322'

In [14]:
saka_shot_id = understat.player(player=saka_id).get_shot_data()

In [15]:
pd.DataFrame(saka_shot_id)

Unnamed: 0,id,minute,result,X,Y,xG,player,h_a,player_id,situation,season,shotType,match_id,h_team,a_team,h_goals,a_goals,date,player_assisted,lastAction
0,321238,25,MissedShots,0.7759999847412109,0.4129999923706055,0.030224399641156197,Bukayo Saka,h,7322,OpenPlay,2019,RightFoot,11702,Arsenal,Aston Villa,3,2,2019-09-22 15:30:00,Nicolas Pepe,Pass
1,323714,43,SavedShot,0.855,0.6140000152587891,0.10016199946403503,Bukayo Saka,a,7322,OpenPlay,2019,LeftFoot,11712,Manchester United,Arsenal,1,1,2019-09-30 19:00:00,,
2,323721,59,BlockedShot,0.899000015258789,0.48700000762939455,0.44646498560905457,Bukayo Saka,a,7322,OpenPlay,2019,RightFoot,11712,Manchester United,Arsenal,1,1,2019-09-30 19:00:00,Calum Chambers,Pass
3,324736,22,BlockedShot,0.865999984741211,0.674000015258789,0.0682813972234726,Bukayo Saka,h,7322,OpenPlay,2019,RightFoot,11722,Arsenal,Bournemouth,1,0,2019-10-06 14:00:00,Pierre-Emerick Aubameyang,Pass
4,351606,78,SavedShot,0.8640000152587891,0.34,0.1093439981341362,Bukayo Saka,h,7322,OpenPlay,2019,RightFoot,11844,Arsenal,Manchester United,2,0,2020-01-01 20:00:00,Mesut Özil,Pass
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
262,531998,31,Goal,0.794000015258789,0.375,0.037814829498529434,Bukayo Saka,h,7322,OpenPlay,2023,LeftFoot,22276,Arsenal,Nottingham Forest,2,1,2023-08-12 11:30:00,William Saliba,Pass
263,532010,86,SavedShot,0.9119999694824219,0.5209999847412109,0.14559100568294525,Bukayo Saka,h,7322,OpenPlay,2023,LeftFoot,22276,Arsenal,Nottingham Forest,2,1,2023-08-12 11:30:00,Kai Havertz,Pass
264,534208,1,BlockedShot,0.87,0.4759999847412109,0.08257292956113815,Bukayo Saka,a,7322,OpenPlay,2023,RightFoot,22294,Crystal Palace,Arsenal,0,1,2023-08-21 19:00:00,,
265,534209,13,BlockedShot,0.915,0.30100000381469727,0.0621970035135746,Bukayo Saka,a,7322,OpenPlay,2023,RightFoot,22294,Crystal Palace,Arsenal,0,1,2023-08-21 19:00:00,Eddie Nketiah,Pass
