In [None]:
!pip install pandas
!pip install plotly==5.22.0

In [297]:
import pandas as pd
import numpy as np
import cufflinks as cf
import plotly.express as px
import plotly.graph_objects as go
import kaleido
import chart_studio.plotly as py
import scipy.stats as stats
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected=True)
cf.go_offline()

In [5]:
stats_df = pd.read_csv("output_files/correctly_grouped_data.csv")

In [16]:
del stats_df['Unnamed: 0']

In [34]:
highest_scoring = stats_df[(stats_df["Gls"] > 15) & (stats_df["poy_winner"] == 1)]

In [37]:
highest_scoring = highest_scoring.sort_values(by='season')

In [38]:
highest_scoring

Unnamed: 0,Player,Pos,Squad,Age,MP,Starts,Min,90s,Gls,Ast,...,year,season,xG,npxG,xAG,npxG+xAG,PrgC,PrgP,PrgR,poy_winner
4923,Eric Cantona,FW,Manchester Utd,27,34,34,2987,33.2,18,12,...,1993,1993/1994,0.0,0.0,0.0,0.0,0,0,0,1
412,Alan Shearer,FW,Blackburn,23,42,42,3770,41.9,34,13,...,1994,1994/1995,0.0,0.0,0.0,0.0,0,0,0,1
11457,Michael Owen,FW,Liverpool,17,36,34,3003,33.4,18,10,...,1997,1997/1998,0.0,0.0,0.0,0.0,0,0,0,1
4589,Dwight Yorke,"FW,MF",Manchester Utd,26,32,32,2781,30.9,18,11,...,1998,1998/1999,1.0,0.0,0.0,0.0,0,0,0,1
9209,Kevin Phillips,FW,Sunderland,26,36,36,3198,35.5,30,4,...,1999,1999/2000,0.0,0.0,0.0,0.0,0,0,0,1
14464,Ruud Van Nistelrooy,FW,Manchester Utd,26,34,33,2907,32.3,25,4,...,2002,2002/2003,0.0,0.0,0.0,0.0,0,0,0,1
16276,Thierry Henry,FW,Arsenal,25,37,37,3330,37.0,30,6,...,2003,2003/2004,0.0,0.0,0.0,0.0,0,0,0,1
16278,Thierry Henry,FW,Arsenal,27,32,30,2672,29.7,27,8,...,2005,2005/2006,0.0,0.0,0.0,0.0,0,0,0,1
3026,Cristiano Ronaldo,"FW,MF",Manchester Utd,22,34,31,2747,30.5,31,6,...,2007,2007/2008,0.0,0.0,0.0,0.0,0,0,0,1
3027,Cristiano Ronaldo,"FW,MF",Manchester Utd,23,33,31,2742,30.5,18,6,...,2008,2008/2009,0.0,0.0,0.0,0.0,0,0,0,1


In [299]:
fig = go.Figure()
px.scatter(highest_scoring, x='season', y='Gls', color='Player', size='Gls', hover_data='Player')


In [336]:
px.bar(highest_scoring, x ='season',y='Gls', color='Player', hover_data ='Player', title='Number of Goals scored by the winner of the Player of the Season Award')

In [289]:
fig.write_image("output_files/fig1.png")

ValueError: 
Image export using the "kaleido" engine requires the kaleido package,
which can be installed using pip:
    $ pip install -U kaleido


In [44]:
px.line(highest_scoring, x='season', y='Gls', title='Highest number of recorded goals each season', labels={'x': 'Season', 'y': 'Goals'})

Determining age group of different players in the EPL for the past 2 seasons

In [48]:
under_20 = stats_df[stats_df['Age'] <= 20]
age20_24 = stats_df[(stats_df['Age'] > 20) & (stats_df['Age'] <= 24)]
age25_29 = stats_df[(stats_df['Age'] >= 25) & (stats_df['Age'] <= 29)]
above_30 = stats_df[stats_df['Age'] >= 30]

Determining which clubs has the highest number of young players in the past 2 seasons

In [63]:
last_two_seasons = ['2022/2023', '2023/2024']
age_df = stats_df[stats_df['season'].isin(last_two_seasons)]


In [64]:
young_players = age_df[(age_df['Age'] <= 20) ]

In [65]:
club_youngsters = young_players.groupby('Squad').size().reset_index(name='Number of young players')

In [67]:
club_youngsters.sort_values(by='Number of young players', ascending=False)

Unnamed: 0,Squad,Number of young players
4,Brighton,22
6,Chelsea,18
12,Liverpool,15
19,Southampton,12
15,Manchester Utd,12
16,Newcastle Utd,11
22,Wolves,10
20,Tottenham,10
18,Sheffield Utd,10
8,Everton,9


In [68]:
px.pie(club_youngsters, values='Number of young players', names='Squad', title='Distribution of young players in EPL in the last two seasons')

Determining the most outfield player in the past 5 seasons

First determine the players with the highest number of Progressive passes, carries and receives

In [247]:
three_seasons = ['2021/2022', '2022/2023', '2023/2024']
avg_prgP_list = []
for season in three_seasons:
    seasonal_stat = stats_df[stats_df['season'] == season]
    avg_prgP = seasonal_stat['PrgP'].mean()
    avg_prgP_list.append(avg_prgP)

In [248]:
avg_prgP_df = pd.DataFrame({'season': three_seasons, 'Avg_PrgP': avg_prgP_list})

prgP_df = pd.merge(stats_df, avg_prgP_df, on='season')
prgP_players = prgP_df[prgP_df['PrgP'] > prgP_df['Avg_PrgP']]

In [106]:
prgP_players['PrgP'].max()

376

In [249]:
prgP_players= prgP_players[prgP_players['PrgP'] >= 100]

In [250]:
prgP_players

Unnamed: 0,Player,Pos,Squad,Age,MP,Starts,Min,90s,Gls,Ast,...,npxG,xAG,npxG+xAG,PrgC,PrgP,PrgR,poy_winner,Total_Prg,is_top_player,Avg_PrgP
1,Aaron Cresswell,DF,West Ham,31,31,31,2726,30.3,2,3,...,1.1,3.2,4.2,54,186,99,0,339,False,52.327839
2,Aaron Cresswell,DF,West Ham,32,28,24,2235,24.8,0,1,...,0.3,3.7,4.0,35,145,82,0,262,False,50.432337
14,Abdoulaye Doucouré,MF,Everton,28,30,29,2537,28.2,2,4,...,2.2,2.0,4.2,33,113,54,0,200,False,52.327839
28,Adam Webster,DF,Brighton,26,22,16,1434,15.9,2,0,...,0.9,0.0,0.9,19,109,7,0,135,False,52.327839
29,Adam Webster,DF,Brighton,27,27,23,1985,22.1,0,0,...,1.8,0.2,1.9,19,119,9,0,147,False,50.432337
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1675,Youri Tielemans,MF,Leicester City,25,31,27,2344,26.0,3,2,...,0.9,4.7,5.6,35,181,56,0,272,False,50.432337
1676,Youri Tielemans,"MF,FW",Aston Villa,26,32,17,1622,18.0,2,6,...,1.3,2.6,3.9,23,128,46,0,197,False,50.584483
1680,Yves Bissouma,MF,Tottenham,26,28,26,2068,23.0,0,0,...,1.4,0.4,1.8,43,169,13,0,225,False,50.584483
1690,İlkay Gündoğan,MF,Manchester City,30,27,20,1857,20.6,8,4,...,9.0,4.9,13.9,60,126,107,0,293,False,52.327839


In [159]:
avg_prgC_list = []
for season in three_seasons:
    seasonal_stat = stats_df[stats_df['season'] == season]
    avg_prgC = seasonal_stat['PrgC'].mean()
    avg_prgC_list.append(avg_prgC)

In [160]:
avg_prgC_df = pd.DataFrame({'season': three_seasons, 'Avg_PrgC': avg_prgC_list})

prgC_df = pd.merge(stats_df, avg_prgC_df, on='season')
prgC_players = prgC_df[prgC_df['PrgC'] > prgC_df['Avg_PrgC']]

In [161]:
prgC_players['PrgC'].max()

218

In [251]:
prgC_players=prgC_players[prgC_players['PrgC'] >= 100]

In [163]:
prgC_players

Unnamed: 0,Player,Pos,Squad,Age,MP,Starts,Min,90s,Gls,Ast,...,season,xG,npxG,xAG,npxG+xAG,PrgC,PrgP,PrgR,poy_winner,Avg_PrgC
44,Alejandro Garnacho,FW,Manchester Utd,19,36,30,2565,28.5,7,4,...,2023/2024,8.4,8.3,5.1,13.4,178,62,281,0,24.651724
51,Alex Iwobi,"FW,MF",Fulham,27,30,25,2192,24.4,5,2,...,2023/2024,5.3,5.3,4.7,10.0,109,147,175,0,24.651724
74,Allan Saint-Maximin,"FW,MF",Newcastle Utd,24,35,31,2804,31.2,5,5,...,2021/2022,5.2,5.2,4.3,9.5,180,87,167,0,24.836996
117,Anthony Gordon,FW,Newcastle Utd,22,35,34,2890,32.1,11,10,...,2023/2024,10.2,9.4,8.0,17.5,138,101,232,0,24.651724
123,Antonee Robinson,DF,Fulham,24,35,35,3088,34.3,0,1,...,2022/2023,0.1,0.1,1.3,1.3,113,129,153,0,22.676626
189,Bernardo Silva,"MF,FW",Manchester City,26,35,33,2857,31.7,8,4,...,2021/2022,6.7,6.7,6.5,13.2,146,170,277,0,24.836996
191,Bernardo Silva,"MF,FW",Manchester City,28,33,29,2578,28.6,6,9,...,2023/2024,3.7,3.7,7.6,11.3,140,177,260,0,24.651724
230,Bukayo Saka,"FW,MF",Arsenal,19,38,36,2978,33.1,11,7,...,2021/2022,10.3,8.7,7.5,16.3,140,101,388,0,24.836996
231,Bukayo Saka,FW,Arsenal,20,38,37,3181,35.3,14,11,...,2022/2023,11.2,9.1,8.5,17.6,179,109,520,0,22.676626
232,Bukayo Saka,FW,Arsenal,21,35,35,2919,32.4,16,9,...,2023/2024,15.5,10.8,10.5,21.2,155,126,508,0,24.651724


In [164]:
avg_prgR_list = []
for season in three_seasons:
    seasonal_stat = stats_df[stats_df['season'] == season]
    avg_prgR = seasonal_stat['PrgR'].mean()
    avg_prgR_list.append(avg_prgR)

In [165]:
avg_prgR_df = pd.DataFrame({'season': three_seasons, 'Avg_PrgR': avg_prgR_list})

prgR_df = pd.merge(stats_df, avg_prgR_df, on='season')
prgR_players = prgR_df[prgR_df['PrgR'] > prgR_df['Avg_PrgR']]

In [255]:
prgR_players['PrgR'].max()

520

In [253]:
prgR_players = prgR_players[prgR_players['PrgR'] >= 200]

In [254]:
prgR_players

Unnamed: 0,Player,Pos,Squad,Age,MP,Starts,Min,90s,Gls,Ast,...,season,xG,npxG,xAG,npxG+xAG,PrgC,PrgP,PrgR,poy_winner,Avg_PrgR
44,Alejandro Garnacho,FW,Manchester Utd,19,36,30,2565,28.5,7,4,...,2023/2024,8.4,8.3,5.1,13.4,178,62,281,0,50.062069
189,Bernardo Silva,"MF,FW",Manchester City,26,35,33,2857,31.7,8,4,...,2021/2022,6.7,6.7,6.5,13.2,146,170,277,0,51.842491
191,Bernardo Silva,"MF,FW",Manchester City,28,33,29,2578,28.6,6,9,...,2023/2024,3.7,3.7,7.6,11.3,140,177,260,0,50.062069
216,Brennan Johnson,FW,Tottenham,22,32,23,2085,23.2,5,10,...,2023/2024,10.3,10.3,8.1,18.3,97,72,293,0,50.062069
227,Bryan Mbeumo,FW,Brentford,21,35,34,2905,32.3,4,7,...,2021/2022,9.3,8.5,3.7,12.2,81,56,273,0,51.842491
230,Bukayo Saka,"FW,MF",Arsenal,19,38,36,2978,33.1,11,7,...,2021/2022,10.3,8.7,7.5,16.3,140,101,388,0,51.842491
231,Bukayo Saka,FW,Arsenal,20,38,37,3181,35.3,14,11,...,2022/2023,11.2,9.1,8.5,17.6,179,109,520,0,49.949033
232,Bukayo Saka,FW,Arsenal,21,35,35,2919,32.4,16,9,...,2023/2024,15.5,10.8,10.5,21.2,155,126,508,0,50.062069
413,Dejan Kulusevski,"FW,MF",Tottenham,23,36,31,2762,30.7,8,3,...,2023/2024,4.7,4.7,7.7,12.4,151,144,388,0,50.062069
570,Gabriel Martinelli,FW,Arsenal,21,36,34,2789,31.0,15,5,...,2022/2023,9.1,9.1,9.1,18.2,124,94,350,0,49.949033


In [256]:
prgC_players_filtered = prgC_players[prgC_players['PrgC'] >= 100]


In [257]:
prgP_players_filtered = prgP_players[prgP_players['PrgP'] >= 100]


In [238]:
prgP_players_filtered

Unnamed: 0,Player,Pos,Squad,Age,MP,Starts,Min,90s,Gls,Ast,...,season,xG,npxG,xAG,npxG+xAG,PrgC,PrgP,PrgR,poy_winner,Avg_PrgP
1,Aaron Cresswell,DF,West Ham,31,31,31,2726,30.3,2,3,...,2021/2022,1.1,1.1,3.2,4.2,54,186,99,0,52.327839
49,Alex Iwobi,"MF,FW",Everton,26,38,38,3378,37.5,2,7,...,2022/2023,2.5,2.5,6.1,8.5,90,194,145,0,50.432337
66,Alexis Mac Allister,MF,Brighton,23,35,31,2886,32.1,10,2,...,2022/2023,12.1,7.3,4.3,11.6,69,174,77,0,50.432337
67,Alexis Mac Allister,MF,Liverpool,24,33,31,2599,28.9,5,5,...,2023/2024,3.7,2.9,3.6,6.4,44,209,48,0,50.584483
93,Andrew Robertson,DF,Liverpool,27,29,29,2537,28.2,3,10,...,2021/2022,1.4,1.4,5.6,7.0,96,186,239,0,52.327839
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1654,Willian,FW,Fulham,34,31,24,2053,22.8,4,2,...,2023/2024,5.1,3.6,3.9,7.5,92,153,181,0,50.584483
1674,Youri Tielemans,MF,Leicester City,24,32,29,2631,29.2,6,4,...,2021/2022,4.4,2.8,4.2,7.0,31,202,69,0,52.327839
1675,Youri Tielemans,MF,Leicester City,25,31,27,2344,26.0,3,2,...,2022/2023,2.5,0.9,4.7,5.6,35,181,56,0,50.432337
1680,Yves Bissouma,MF,Tottenham,26,28,26,2068,23.0,0,0,...,2023/2024,1.4,1.4,0.4,1.8,43,169,13,0,50.584483


In [258]:
prgR_players_filtered = prgR_players[prgR_players['PrgR'] >= 250]

In [240]:
prgR_players_filtered

Unnamed: 0,Player,Pos,Squad,Age,MP,Starts,Min,90s,Gls,Ast,...,season,xG,npxG,xAG,npxG+xAG,PrgC,PrgP,PrgR,poy_winner,Avg_PrgR
44,Alejandro Garnacho,FW,Manchester Utd,19,36,30,2565,28.5,7,4,...,2023/2024,8.4,8.3,5.1,13.4,178,62,281,0,50.062069
189,Bernardo Silva,"MF,FW",Manchester City,26,35,33,2857,31.7,8,4,...,2021/2022,6.7,6.7,6.5,13.2,146,170,277,0,51.842491
191,Bernardo Silva,"MF,FW",Manchester City,28,33,29,2578,28.6,6,9,...,2023/2024,3.7,3.7,7.6,11.3,140,177,260,0,50.062069
216,Brennan Johnson,FW,Tottenham,22,32,23,2085,23.2,5,10,...,2023/2024,10.3,10.3,8.1,18.3,97,72,293,0,50.062069
227,Bryan Mbeumo,FW,Brentford,21,35,34,2905,32.3,4,7,...,2021/2022,9.3,8.5,3.7,12.2,81,56,273,0,51.842491
230,Bukayo Saka,"FW,MF",Arsenal,19,38,36,2978,33.1,11,7,...,2021/2022,10.3,8.7,7.5,16.3,140,101,388,0,51.842491
231,Bukayo Saka,FW,Arsenal,20,38,37,3181,35.3,14,11,...,2022/2023,11.2,9.1,8.5,17.6,179,109,520,0,49.949033
232,Bukayo Saka,FW,Arsenal,21,35,35,2919,32.4,16,9,...,2023/2024,15.5,10.8,10.5,21.2,155,126,508,0,50.062069
413,Dejan Kulusevski,"FW,MF",Tottenham,23,36,31,2762,30.7,8,3,...,2023/2024,4.7,4.7,7.7,12.4,151,144,388,0,50.062069
570,Gabriel Martinelli,FW,Arsenal,21,36,34,2789,31.0,15,5,...,2022/2023,9.1,9.1,9.1,18.2,124,94,350,0,49.949033


In [264]:
merged_players = prgC_players_filtered[['Player', 'season', 'PrgC']].merge(
    prgP_players_filtered[['Player', 'season', 'PrgP']], on=['Player', 'season']
).merge(
    prgR_players_filtered[['Player', 'season', 'PrgR']], on=['Player', 'season']
)

In [267]:
merged_players.drop_duplicates(inplace=True)


In [317]:
merged_players

Unnamed: 0,Player,season,PrgC,PrgP,PrgR
0,Bernardo Silva,2021/2022,146,170,277
1,Bernardo Silva,2023/2024,140,177,260
2,Bukayo Saka,2021/2022,140,101,388
3,Bukayo Saka,2022/2023,179,109,520
4,Bukayo Saka,2023/2024,155,126,508
5,Dejan Kulusevski,2023/2024,151,144,388
6,Jack Grealish,2021/2022,153,100,267
7,Jack Grealish,2022/2023,143,100,310
8,Jack Harrison,2022/2023,103,103,255
9,João Cancelo,2021/2022,125,357,270


In [327]:
fig = go.Figure(data=[go.Table(
    header=dict(values=list(merged_players.columns),
                fill_color='paleturquoise',
                align='left'),
    cells=dict(values=[merged_players.Player, merged_players.season,merged_players.PrgC,merged_players.PrgP,merged_players.PrgR],
               fill_color='lavender',
               align='left'))
])
fig.update_layout(width=1000,height=800)
go.Layout(title='Players with the most progressive carries, receives and passes in 2021/2022, 2022/2023 & 2023/2024 season')
fig.show()

Most penalised

In [307]:
del stats_df['is_top_player']

In [None]:
import pandas as pd
import plotly.express as px




stats_df['Total_Cards'] = stats_df['CrdY'] + stats_df['CrdR']


agg_df = stats_df.groupby(['Squad', 'season'])['Total_Cards'].sum().reset_index()


agg_df['season'] = pd.Categorical(agg_df['season'], ordered=True)


agg_df = agg_df.sort_values('season')


max_cards_df = agg_df.loc[agg_df.groupby('season')['Total_Cards'].idxmax()]


fig = px.scatter(max_cards_df, x='season', y='Total_Cards', color='Squad', size='Total_Cards',
                 hover_data=['Squad', 'Total_Cards'],
                 title='Teams with Highest Total Red and Yellow Cards per Season')

fig.update_layout(xaxis={'categoryorder':'category ascending'})

fig.show()
