In [1]:
import numpy as np
import pandas as pd
import streamlit as st
import scipy.stats as stat

In [2]:
df = pd.read_html("Bulgarian First League - end of 32-33.html", encoding="utf-8", thousands=",")[0]
df["Height"] = df["Height"].str.split(' ').str[0].apply(pd.to_numeric)
df["Wage"] = df["Wage"].str.split('€').str[1].str.split(' ').str[0].str.replace(',','').apply(pd.to_numeric)
df["Expires"] = pd.to_datetime(df["Expires"], dayfirst=True)
df["Distance"] = df["Distance"].str.split('k').str[0].apply(pd.to_numeric)
pct_cols = ["Hdr %", "Tck R", "Pas %", "Cr C/A", "Conv %"]
df = df.replace("-",0)
for column in df:
    if column in pct_cols:
        df[column] = df[column].str.rstrip('%').astype('float') / 100.0
df = df.fillna(0)
df.iloc[:,11:] = df.iloc[:,11:].apply(pd.to_numeric)

In [3]:
df0 = pd.read_csv("role_pos.csv", index_col=0)
role_pos = df0.to_dict()["Pos Grp"]

In [4]:
df1 = df.iloc[:,:12].copy()
df1["Best Pos"] = df1["Best Role"].map(role_pos)
df1 = df1.drop(["Inf", "Best Role", "Best Duty"], axis=1)
df1

Unnamed: 0,Name,Best Pos,Club,Height,Age,Mins,Wage,Expires,Av Rat
0,Stanislav Rabotov,CB,Arda,179,30,2985,1700,2035-06-30,6.76
1,Hristo Granchov,DM,Arda,173,26,2718,2400,2036-06-30,6.71
2,Ivan Ivanov,WF,Arda,179,25,2595,1200,2035-06-30,6.92
3,Thiago Llantén,WF,Arda,168,22,2448,4300,2035-06-30,6.71
4,Vladimir Lyubenov,CB,Arda,184,26,2430,1300,2035-06-30,6.88
...,...,...,...,...,...,...,...,...,...
360,Petar Ivanov,GK,Yantra Gabrovo,185,33,649,750,2033-06-30,6.60
361,Ivan Palikrushev,DM,Yantra Gabrovo,169,18,533,180,2037-06-30,6.76
362,Mariyan Tonev,CF,Yantra Gabrovo,185,34,331,180,2034-06-30,6.95
363,Ivan Borisov,CF,Yantra Gabrovo,197,25,325,180,2035-06-30,6.63


In [5]:
df["Shots Faced"] = (df["Conc"] + df["Svh"] + df["Svt"] + df["Svp"]).astype(float)
df1["Save%"] = np.where(df["Shots Faced"]!=0, 1-(df["Conc"].astype(float)/df["Shots Faced"].astype(float)), 0)
df1["xGP/90"] = 90*df["xGP"]/df["Mins"]
df1["Shots/Conc"] = np.where(df["Shots Faced"]!=0, (df["Shots Faced"].astype(float)/df["Conc"].astype(float)), 0)
df1["GKDA"] = df["Poss Won/90"]
df1["Dist/90"] = 90*df["Distance"]/df["Mins"]
df1["Spr/90"] = df["Sprints/90"]
df1["Prs C/90"] = df["Pres C/90"]
df1["Aer%"] = df["Hdr %"]
df1["Key DDAs"] = df["Shts Blckd/90"] + df["K Hdrs/90"]
df1["DDAs"] = df["Clr/90"] + df["Blk/90"] + df["Hdrs W/90"]
df1["Tck%"] = df["Tck R"]
df1["FoulPrev"] = np.where(df["Tck R"].astype(float)!=0, (df["Tck/90"].astype(float)/df["Tck R"].astype(float)) / (90*df["Fls"].astype(float)/df["Mins"].astype(float)), 0)
df1["HDAs"] = df["Tck/90"] + df["Int/90"]
df1["Pas%"] = df["Pas %"]
df1["PsC/90"] = df["Ps C/90"]
df1["PrPas/90"] = df["Pr passes/90"]
df1["Drbs/90"] = df["Drb/90"]
df1["CrsQL"] = df["Cr C/A"]
df1["ChQL"] = np.where(df["OP-KP/90"]!=0, df["xA/90"]/df["OP-KP/90"], 0)
df1["xAst/90"] = df["xA/90"]
df1["Crs/90"] = df["Cr C/90"]
df1["ChC/90"] = df["Ch C/90"]
df1["KP/90"] = df["OP-KP/90"]
df1["Hdr/90"] = df["Hdrs W/90"]
df1["npxG/90"] = df["NP-xG/90"]
df1["xGOP/90"] = df["Gls/90"] - df["xG/90"]
df1["Conv%"] = df["Conv %"]
df1["xG/Shot"] = np.where(df["Shot/90"]!=0, df["xG/90"]/df["Shot/90"].astype(float), 0)
df1["S/90"] = df["Shot/90"]
df1.iloc[:,-29:] = df1.iloc[:,-29:].apply(pd.to_numeric)

In [6]:
data = df1.iloc[:,-29:].astype(float).copy()
zscore = lambda x: (x - x.mean())/(x.std())
arr_std = data.groupby(df1["Best Pos"]).transform(zscore)
arr_norm = stat.norm.cdf(arr_std)*100
data_norm = pd.DataFrame(arr_norm, columns=data.columns).fillna(0)

  sqr = _ensure_numeric((avg - values) ** 2)


In [7]:
GK_q = ["Save%", "xGP/90", "Shots/Conc", "GKDA", "Dist/90", "Pas%", "PsC/90"]
CB_q = ["Aer%", "Key DDAs", "Tck%", "FoulPrev", "Pas%"]
FB_q = ["Dist/90", "Spr/90", "Tck%", "FoulPrev", "CrsQL", "Aer%"]
DM_q = ["Dist/90", "Spr/90", "Tck%", "FoulPrev", "ChQL", "xAst/90", "Aer%"]
AM_q = ["Dist/90", "Spr/90", "CrsQL", "ChQL", "xAst/90", "npxG/90", "xGOP/90", "Conv%"]
WF_q = ["Dist/90", "Spr/90", "CrsQL", "ChQL", "xAst/90", "npxG/90", "xGOP/90", "Conv%"]
CF_q = ["Dist/90", "Spr/90", "ChQL", "xAst/90", "Aer%", "npxG/90", "xGOP/90", "Conv%"]
CB_t = ["DDAs", "HDAs", "PrPas/90", "Drbs/90"]
FB_t = ["Prs C/90", "HDAs", "PrPas/90", "Drbs/90", "Crs/90", "KP/90", "Hdr/90"]
DM_t = ["Prs C/90", "HDAs", "PrPas/90", "Drbs/90", "ChC/90", "KP/90", "Hdr/90"]
AM_t = ["Prs C/90", "PrPas/90", "Drbs/90", "Crs/90","ChC/90", "KP/90", "xG/Shot", "S/90"]
WF_t = ["Prs C/90", "PrPas/90", "Drbs/90", "Crs/90","ChC/90", "KP/90", "xG/Shot", "S/90"]
CF_t = ["Prs C/90", "ChC/90", "KP/90", "Hdr/90", "xG/Shot", "S/90"]
pos_vars = {
    "GK": [GK_q],
    "CB": [CB_q, CB_t],
    "FB": [FB_q, FB_t],
    "DM": [DM_q, DM_t],
    "AM": [AM_q, AM_t],
    "WF": [WF_q, WF_t],
    "CF": [CF_q, CF_t]
}

In [8]:
pos_need = "CF"
display = df1.iloc[:,:9].copy()
display = pd.concat([display, data_norm[pos_vars[pos_need][1]]], axis="columns")
display = pd.concat([display, data_norm[pos_vars[pos_need][0]]], axis="columns")
display["Rating"] = data_norm[pos_vars[pos_need][0]].mean(axis=1)
display = display[display["Best Pos"] == pos_need]
display = display[display["Age"] < 25]
display.sort_values("Rating", ascending=False)

Unnamed: 0,Name,Best Pos,Club,Height,Age,Mins,Wage,Expires,Av Rat,Prs C/90,...,S/90,Dist/90,Spr/90,ChQL,xAst/90,Aer%,npxG/90,xGOP/90,Conv%,Rating
174,Luis Flórez,CF,Litex,186,21,1515,725,2034-06-30,7.1,94.3931,...,74.120062,67.894648,68.736119,78.235352,74.022101,81.713696,58.712005,90.400164,65.322998,73.129636
164,Francis Duailibi,CF,Litex,183,23,3035,1700,2036-06-30,7.21,70.812205,...,66.012295,38.090393,34.646179,92.166596,94.05977,19.264534,58.712005,81.218236,74.264653,61.552796
341,Emiliyan Dimitrov,CF,Spartak Varna,185,24,257,425,2035-06-30,6.63,92.34319,...,16.766462,68.325836,69.940372,52.605857,98.990946,43.679095,95.197176,0.083245,10.035998,54.857316
180,Gabríel Snær Vattnes,CF,Litex,180,23,813,525,2033-06-30,6.8,75.553465,...,10.991709,78.124596,94.742128,36.825942,89.517883,23.38049,12.470421,51.934557,45.189084,54.023137
125,Amit Abramov,CF,CSKA-Sofia,196,23,2663,16000,2036-06-30,6.8,19.021087,...,54.194787,28.456034,24.621243,33.989748,9.009478,79.774682,23.621025,85.152355,55.432058,42.507078
63,Brian Knudsen Pedersen,CF,Botev Plovdiv,184,20,839,750,2034-06-30,6.62,17.331,...,81.874307,22.869112,26.13165,74.768542,23.280239,35.500819,94.431587,20.009394,40.144029,42.141921
250,Marco Aurélio,CF,Ludogorets,184,22,473,10000,2036-06-30,6.72,16.522977,...,87.686945,56.895192,72.2837,11.228011,18.88128,54.954868,74.324563,7.823611,20.357213,39.593555
15,Julle Mbayo,CF,Arda,189,23,888,2800,2037-06-30,6.71,41.268376,...,31.032479,8.203497,13.986423,80.523804,23.280239,75.546361,38.770518,24.690464,30.604535,36.95073
288,Nikolay Petkov,CF,Septemvri Sofia,180,18,598,350,2035-06-30,6.63,6.2565,...,4.499274,9.18803,17.156231,56.88096,68.842689,27.942256,3.580031,56.087702,42.651522,35.291178
264,Wataru Tanaka,CF,Montana,178,24,1001,2300,2034-06-30,6.49,11.558506,...,26.065162,34.099184,39.502011,1.281957,4.994028,8.619811,82.695781,15.906484,69.944008,32.130408
