In [1]:
import pandas as pd
import numpy as np
import glob
from functools import reduce
import ipywidgets 
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px
import plotly
from PIL import Image

def clean_cols(df):
    for i, columns_old in enumerate(df.columns.levels):
        columns_new = np.where(columns_old.str.contains('Unnamed'), '', columns_old)
        df.rename(columns=dict(zip(columns_old, columns_new)), level=i, inplace=True)


dir = "C:\\Users\\Corey\\"
doc_dir = "C:\\Users\\Corey\\coreyrastello\\docs\\"

In [2]:
#read it into a dataframe, use the right number of headers and index cols

df = pd.read_csv(rf"{dir}coreyrastello\data\premier-league\AllSeasons\NonMatchData\epl_player_season_stats_goal_shot_creation_AllSeasons_Season.csv", header=[0,1], index_col=[0,1,2,3])
sch = pd.read_csv(rf"{dir}coreyrastello\data\premier-league\AllSeasons\NonMatchData\epl_schedule_AllSeasons_Season.csv", index_col='game_id')
clean_cols(df)


In [3]:
df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,nation,pos,age,born,90s,SCA,SCA,SCA Types,SCA Types,SCA Types,SCA Types,SCA Types,SCA Types,GCA,GCA,GCA Types,GCA Types,GCA Types,GCA Types,GCA Types,GCA Types
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,SCA,SCA90,PassLive,PassDead,TO,...,Fld,Def,GCA,GCA90,PassLive,PassDead,TO,Sh,Fld,Def
league,season,team,player,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2,Unnamed: 23_level_2,Unnamed: 24_level_2
ENG-Premier League,1819,Arsenal,Aaron Ramsey,WAL,MF,27,1990,14.8,48,3.25,41,0,3,...,0,1,10,0.68,9,0,1,0,0,0
ENG-Premier League,1819,Arsenal,Ainsley Maitland-Niles,ENG,DF,20,1997,11.0,19,1.73,16,1,1,...,0,0,2,0.18,2,0,0,0,0,0
ENG-Premier League,1819,Arsenal,Alex Iwobi,NGA,"FW,MF",22,1996,21.9,84,3.84,77,1,3,...,0,0,11,0.5,11,0,0,0,0,0
ENG-Premier League,1819,Arsenal,Alexandre Lacazette,FRA,FW,27,1991,27.8,84,3.02,55,0,16,...,5,2,15,0.54,10,0,3,1,1,0
ENG-Premier League,1819,Arsenal,Bernd Leno,GER,GK,26,1992,31.5,0,0.0,0,0,0,...,0,0,0,0.0,0,0,0,0,0,0


In [4]:
df = df.reset_index()

In [5]:
df = df.loc[df['90s'] > 10]
df = df.loc[df['pos'].isin(['FW','MF','MF,FW','FW,MF'])]
df = df.loc[df['season'].isin([1920,2021,2122,2223,2324,2425])]
df.pos.unique()


array(['FW', 'MF', 'FW,MF', 'MF,FW'], dtype=object)

In [7]:
fig = px.scatter(
    x = df['SCA']['SCA90']
    ,y=df['GCA']['GCA90']
    ,color=df['team']
    ,text=df['player']
    ,hover_data=[df['player'],df['nation']]
    ,animation_frame = df['season']
    ,animation_group = df['player']
    ,height=720
    ,width=1280
    ,title = f"SCA & GCA - Attacking Efficiency (90s > 10)"
    ,labels={
        "x":"<b>SCA90</b>",
        "y":"<b>GCA90</b>",
        "color":"Team",
        "hover_data_0":"Player",
        "hover_data_1":"Nation"
    },
    range_x=[-0.2,10],
    range_y=[-0.2,2]
    )

fig.update_layout(
    showlegend=True,
    images=[dict(
        source=Image.open(rf"{dir}coreyrastello\data\premier-league\logos\EPL.png"),
        xref="paper", yref="paper",
        x=1, y=1.03,
        sizex=0.2, sizey=0.2,
        xanchor="right", yanchor="bottom"
      )],
    margin=dict(l=20, r=20, t=120, b=20),
    paper_bgcolor='lavender',
    title={
        'font':{
            'family':'Arial',
            'size' :25,
            'color':'#1C2C5b'
        }}
)

fig.update_traces(textposition='bottom center')

fig.update_yaxes(showgrid=True, tickfont_family='Arial Black', tickfont_size=15, titlefont_size=20, titlefont_color='#1C2C5b', tickfont_color='#1C2C5b')
fig.update_xaxes(showgrid=True, tickfont_family='Arial Black', tickfont_size=15, titlefont_size=20, titlefont_color='#1C2C5b', tickfont_color='#1C2C5b')

fig.write_html(rf"{dir}coreyrastello\visualization\python\premierleague_SCA_GCA_animated.html")
fig.write_html(rf"{doc_dir}premierleague_SCA_GCA_animated.html")
fig.show()