In [2]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

directory = "C:/Users/ybena/Documents/FM_Stat_Analysis"

def imp_fmt_data(dir):
    """
    This function imports the attributes and statistics
    datasets and formats it adequately to work on it
    """
    os.chdir(dir) # change directory
    # import attributes dataset
    attributes = pd.read_html(dir + "/squad_attributes.html")[0]
    # import stats dataset
    stats = pd.read_html(dir + "/squad_stats.html")[0]
    # convert unavailable data ('-') to NaN
    for df in (attributes,stats):
        df.replace('-',np.nan,inplace=True)
    # remove units ('km', '%') to treat as numeric data
    for cols in ['Distance','Dist/90']: # remove 'km'
        stats[cols] = stats[cols].str.replace('km','').astype('float')
    rm_pct = ['Gwin','Pens Saved Ratio','Pen/R','Cr C/A','Hdr %',
              'Pas %','Shot %','Tck R']
    for cols in rm_pct: #remove '%' and convert to decimal
        stats[cols] = stats[cols].str.replace('%','').astype('float')
        stats[cols] = stats[cols]/100
    # format App to get number of starts + number of substitute entries
    apps = stats['Apps']
    stats['Apps'] = [int(x[0:x.find(' (')])+int(x[(x.find(' (')+2):len(x)-1]) \
         if x.find(' (')!=-1 else x for x in apps]
    # format data to correct type
    ch_to_int = ['Age','AT Apps','AT Gls','AT Lge Apps','AT Lge Gls','Apps',
                 'Ast','Clean sheets','FA','Fls','D','Lost','G. Mis','Won',
                 'Gls','Conc','Int Ast','Int Conc','Mins','Last C','Last Gl',
                 'Pens','Pens Faced','Pens Saved','Pens S','PoM','Red',
                 'Starts','Tgls','Tcon','Yel','Hdrs A','ChC','Cr A','Cr C',
                 'Drb','Hdrs','Itc','K Hdrs','K Pas','K Tck','Gl Mst',
                 'Mstks','Off','Pas A','Ps C','Svh','Svp','Svt','Shots',
                 'ShT','Tck A','Tck W']
    for cols in stats.columns[2:len(stats.columns)]:
        stats[cols] = stats[cols].astype('float16')
    for cols in ch_to_int:
        stats[cols] = stats[cols].astype('Int16')
    # return attributes dataset and statistics dataset
    return [attributes,stats]

# import attributes and stats datasets
[squad_attr,squad_stat] = imp_fmt_data(directory)

In [3]:
squad_stat.head(5)

Unnamed: 0,Name,Position,Age,AT Apps,AT Gls,AT Lge Apps,AT Lge Gls,Apps,Ast,Mins/Gl,...,Svt,Shots,ShT,ShT/90,Shot %,Shot/90,Tck A,Tck R,Tck W,Tck
0,Riccardo Galli,GK,26,120,,114,,6,0,,...,9.0,0,,,,,0,,,
1,Stefano Minelli,GK,26,179,,177,,32,0,,...,22.0,0,,,,,1,1.0,1.0,0.029999
2,Daniele Capelli,D (RC),33,326,10.0,323,9.0,24,1,1986.0,...,,11,9.0,0.409912,0.819824,0.5,23,0.910156,21.0,0.950195
3,Carlo Pelagatti,D (RC),31,280,9.0,278,8.0,25,0,2224.0,...,,8,5.0,0.199951,0.629883,0.320068,28,1.0,28.0,1.129883
4,Matteo Lovato,D (C),20,9,1.0,6,1.0,9,0,655.0,...,,3,2.0,0.27002,0.669922,0.409912,4,1.0,4.0,0.549805


In [76]:
testdf=squad_stat[['Name','Position','Starts']+['Ps A/90','Ps C/90']]
testdf.head(5)

Unnamed: 0,Name,Position,Starts,Ps A/90,Ps C/90
0,Riccardo Galli,GK,6,35.84375,22.671875
1,Stefano Minelli,GK,32,28.46875,20.46875
2,Daniele Capelli,D (RC),22,49.125,44.71875
3,Carlo Pelagatti,D (RC),25,47.03125,41.53125
4,Matteo Lovato,D (C),7,40.25,32.15625


In [81]:
thisdict = {
  '1v1 def': "Ford",
  "model": "Mustang",
  "year": 1964
}
print(thisdict)

{'1v1 def': 'Ford', 'model': 'Mustang', 'year': 1964}


In [67]:
def pointsize(starts):
    return [0.02 if x<10 else 0.04 if x>25 else 0.08 for x in starts]
pointsize(testdf['Starts'])
xx = np.linspace(0,100,1000)
import plotly.express as px
import plotly.graph_objects as go
fig=go.Figure()
fig.add_trace(go.Scatter(x=testdf['Ps A/90'],y=testdf['Ps C/90'],mode='markers',text=[testdf['Name']+" "+testdf['Position']]))
fig.add_trace(go.Scatter(x=xx,y=xx,text=None))
fig.add_trace(go.Scatter(x=xx,y=0.9*xx))
fig.show()

In [82]:
def scatter_intplot(df,kpi_x,kpi_y,tg_ratio):
    """
    This function actually produces the interactive plots, based on two KPIs to compare and a target completion ratio.
    """
    fig = go.Figure()
    fig.add_trace(go.Scatter(
        # KPIs
        x=df[kpi_x], y=df[kpi_y],
        mode='markers',
        # label the text boxes with useful info as Name, Position, Club, Division, Apps, Starts
        text=df['Name']
    ))
    # Add 100% and target completion ratios
    cp = np.linspace(0,100,1000)
    fig.add_trace(go.Scatter(
        x=cp, y=cp,
        name="100% completion ratio"
    ))
    fig.add_trace(go.Scatter(
        x=cp, y=tg_ratio*cp,
        name=str(int(tg_ratio*100)) + "% completion ratio"
    ))
    fig.update_layout(title='Passing completion',
                     xaxis_title=kpi_x, yaxis_title=kpi_y)
    fig.show()

scatter_intplot(testdf,'Ps A/90','Ps C/90',0.9)