Data statistik disediakan oleh Understat.
Unofficial Understat python package: https://understat.readthedocs.io/en/latest/

Visualisasi menggunakan Altair. Kenapa bukan Seaborn atau Matplotlib? Karena Altair bisa interaktif.

Expected Goals (xG): Probability that a shot will result in a goal based on the characteristics of that shot and the events leading up to it.
https://fbref.com/en/expected-goals-model-explained/



In [None]:
#@title
!pip install --upgrade understat;
!pip install altair;

Collecting understat
  Downloading https://files.pythonhosted.org/packages/7a/5f/ef36c18478c1c4c0fba1fff4dc91063f023e197fd3c60a461d9dd4606ae1/understat-0.1.3-py3-none-any.whl
Collecting codecov
  Downloading https://files.pythonhosted.org/packages/93/9f/bbea5b6231308458963cb5c067bc5643da9949689702fa5a382714b59699/codecov-2.1.11-py2.py3-none-any.whl
Collecting aiohttp
[?25l  Downloading https://files.pythonhosted.org/packages/88/c0/5890b4c8b04a79b7360e8fe4490feb0bb3ab179743f199f0e6220cebd568/aiohttp-3.7.4.post0-cp37-cp37m-manylinux2014_x86_64.whl (1.3MB)
[K     |████████████████████████████████| 1.3MB 4.1MB/s 
Collecting pytest-cov
  Downloading https://files.pythonhosted.org/packages/ba/84/576b071aef9ac9301e5c0ff35d117e12db50b87da6f12e745e9c5f745cc2/pytest_cov-2.12.1-py2.py3-none-any.whl
Collecting pytest-mock
  Downloading https://files.pythonhosted.org/packages/fd/be/ce7e79a7bf68ff6630f662f58a8dc68e2a602d8649a1c0e05c8e6b9a2177/pytest_mock-3.6.1-py3-none-any.whl
Collecting pytest-ai

In [None]:
import asyncio
import json
import nest_asyncio
import altair as alt
nest_asyncio.apply()
 
import aiohttp 
from understat import Understat
import pandas as pd
pd.options.mode.chained_assignment = None  # default='warn'


b=[]

#ambil data Understats, looping untuk lima liga Eropa
league=['epl', 'la liga', 'bundesliga', 'serie a', 'ligue 1']
for x in league:
  async def main():
    async with aiohttp.ClientSession() as session:
        understat = Understat(session)
        player = await understat.get_league_players(
            x, 2020
        )
        #print(json.dumps(player))        
        b.append(player)
  loop = asyncio.get_event_loop()
  a=loop.run_until_complete(asyncio.gather(main()))

c=[]
for v in b:
  c=c+v

#Create pandas dataframe dari data players stats
data = json.dumps(c)
df2 = pd.read_json(data)
df2;

# Data preparation & analysis

In [None]:
# filter untuk mengambil pemain depan saja. Masukkan di dataframe fw_stats
fw_stats = df2[df2['position'].str.contains('F')]

#filter player dengan 0 gol
fw_stats = fw_stats[fw_stats['goals'] > 0]

In [None]:
#tambahin statistik goal per 90 menit, shot per 90 menit dan jumlah goal per shot ke fw_stats
for index, row in fw_stats.iterrows():
  fw_stats['goal90']=round(90*fw_stats['goals']/fw_stats['time'],3)
  fw_stats['shot90']=round(90*fw_stats['shots']/fw_stats['time'],3)
  fw_stats['goalshot']=round(100*fw_stats['goals']/fw_stats['shots'],3)  
  fw_stats['xG90']=round(100*fw_stats['xG']/fw_stats['time'],3)


In [None]:
#bikin stat baru untuk masing-masing player
data = [fw_stats['player_name'],fw_stats['team_title'],fw_stats['goals'],fw_stats['shots'],fw_stats['shot90'], fw_stats['goalshot'], fw_stats['goal90'], fw_stats['xG90']]
headers = ["player_name", "team_title", "goals", "shots", "shot90", "goalshot", "goal90", "xG90"]

df3 = pd.concat(data, axis=1, keys=headers)

In [None]:
#sanity checking
df3.loc[df3['player_name'] == 'Jamie Vardy']
fw_stats.loc[fw_stats['player_name'] == 'Jamie Vardy']

Unnamed: 0,id,player_name,games,time,goals,xG,assists,xA,shots,key_passes,yellow_cards,red_cards,position,team_title,npg,npxG,xGChain,xGBuildup,goal90,shot90,goalshot,xG90
6,755,Jamie Vardy,34,2848,15,19.942946,9,5.087882,82,28,1,0,F S,Leicester,7,13.092427,18.227907,2.412588,0.474,2.591,18.293,0.7


# Visualisasi

In [None]:
scat = alt.Chart(df3).mark_circle().encode(
  y='shot90:Q',
  x='goalshot:Q',
  size=alt.Size('goals:Q', scale=alt.Scale(range=[50, 400]),legend=alt.Legend(symbolFillColor='white')),
  color=alt.Color("shots:Q",scale=alt.Scale(scheme='blues',reverse=True)),
  tooltip=[alt.Tooltip('player_name', title='name'),
           alt.Tooltip('team_title', title='team'),
           alt.Tooltip('goals', title='goals'),
           alt.Tooltip('shots', title='shots'),
           alt.Tooltip('shot90', title='shot per 90 min'),
           alt.Tooltip('goalshot', title='goals per shot')]
).properties(
    width=800,
    height=500
)
 
alt.themes.enable("dark")
 
#bikin garis average
rule1 = alt.Chart(df3).mark_rule(color='red',opacity=0.5).encode(
    y=alt.Y('mean(shot90):Q', title='Shots per 90 min', axis=alt.Axis(grid=False, titleFontSize=16, labelFontSize=12, titlePadding=8, values=list(range(0, 6, 1))))
)
rule2 = alt.Chart(df3).mark_rule(color='red',opacity=0.5).encode(
    x=alt.X('mean(goalshot):Q', title='Conversion rate (%)', axis=alt.Axis(grid=False, titleFontSize=16, labelFontSize=12, titlePadding=8, values=list(range(0, 50, 10))))
)
 
text1 = alt.Chart({'values':[{'x': 4, 'y': 5.8}]}).mark_text(
    text='aggresive, wasteful',color='white',size=12).encode(
    x='x:Q', y='y:Q'
)
text2 = alt.Chart({'values':[{'x': 4, 'y': 0.2}]}).mark_text(
    text='passive, wasteful',color='white',size=12).encode(
    x='x:Q', y='y:Q'
)
text3 = alt.Chart({'values':[{'x': 46, 'y': 5.8}]}).mark_text(
    text='aggresive, clinical',color='white',size=12).encode(
    x='x:Q', y='y:Q'
)
text4 = alt.Chart({'values':[{'x': 46, 'y': 0.2}]}).mark_text(
    text='passive, clinical',color='white',size=12).encode(
    x='x:Q', y='y:Q'
)
 
(scat + rule1 + rule2 + text1 + text2 + text3 + text4).properties(width=600, title="Attacking Efficiency", padding=20).configure_title(fontSize=20)

In [None]:
df4 = df3[fw_stats['goals'] >= 10]
df4

Unnamed: 0,player_name,team_title,goals,shots,shot90,goalshot,goal90,xG90
0,Harry Kane,Tottenham,23,138,4.010,16.667,0.668,0.716
1,Mohamed Salah,Liverpool,22,126,3.676,17.460,0.642,0.656
3,Son Heung-Min,Tottenham,17,68,1.950,25.000,0.487,0.351
4,Patrick Bamford,Leeds,17,107,3.122,15.888,0.496,0.596
5,Dominic Calvert-Lewin,Everton,16,83,2.596,19.277,0.500,0.633
...,...,...,...,...,...,...,...,...
2200,Renaud Ripart,Nimes,11,68,1.994,16.176,0.323,0.336
2201,Ludovic Blas,Nantes,10,71,2.263,14.085,0.319,0.279
2202,Sehrou Guirassy,Rennes,10,52,2.650,19.231,0.510,0.461
2203,Yoane Wissa,Lorient,10,59,1.972,16.949,0.334,0.497


In [None]:
scat = alt.Chart(df4).mark_circle().encode(
  y=alt.Y('shot90:Q', scale=alt.Scale(domain=[1.4, 5.6])),
  x=alt.X('goalshot:Q', scale=alt.Scale(domain=[10, 40])),
  color=alt.Color('goals:N', legend=None),
  tooltip=[alt.Tooltip('player_name', title='name'),
           alt.Tooltip('team_title', title='team'),
           alt.Tooltip('goals', title='goals'),
           alt.Tooltip('shots', title='shots'),
           alt.Tooltip('shot90', title='shot per 90 min'),
           alt.Tooltip('goalshot', title='goals per shot')]
).properties(
    width=800,
    height=500
)


text = scat.mark_text(
    align='left',
    baseline='middle',
    dx=7
).encode(
    text='player_name',
)

# scat.encode(y=Y('shot90:Q',scale=Scale(domain=[1, 5.6])))
(scat + rule1 + rule2 + text).properties(width=600, title="Attacking Efficiency", padding=20).interactive().configure_title(fontSize=20)

In [None]:
scat = alt.Chart(df4).mark_circle().encode(
  y=alt.Y('goal90:Q'),
  # y='shot90:Q',
  # x='goalshot:Q',
  x=alt.X('xG90:Q'),
  color=alt.Color('goals:N', legend=None),
  tooltip=[alt.Tooltip('player_name', title='name'),
           alt.Tooltip('team_title', title='team'),
           alt.Tooltip('goals', title='goals'),
           alt.Tooltip('shots', title='shots'),
           alt.Tooltip('shot90', title='Shot per 90 min'),
           alt.Tooltip('goalshot', title='Goals per shot')]
).properties(
    width=800,
    height=500
)

line = pd.DataFrame({
    'Length': [0, 1.5],
    'Width':  [0, 1.5],
})

line_plot = alt.Chart(line).mark_line(color= 'red',opacity=0.5).encode(
    x= alt.X('Length', title='xG per 90', axis=alt.Axis(grid=False, titleFontSize=16, labelFontSize=12, titlePadding=8)),
    y= alt.Y('Width', title='goals per 90', axis=alt.Axis(grid=False, titleFontSize=16, labelFontSize=12, titlePadding=8))
)


text = scat.mark_text(
    align='left',
    baseline='middle',
    dx=7
).encode(
    text='player_name',
    # size=alt.Size('goals:Q', scale=alt.Scale(range=[8, 15]))
)

# scat.encode(y=Y('shot90:Q',scale=Scale(domain=[1, 5.6])))
(scat + line_plot + text).properties(width=600, title="xG vs Goals", padding=20).interactive().configure_title(fontSize=20).configure_axis(grid=False, domain=False)

 - Hedi K, Juli 2021

