In [1]:
import pandas as pd
import numpy as np
from sklearn.covariance import EmpiricalCovariance

In [2]:
df = pd.read_csv('https://raw.githubusercontent.com/mancunian1792/2019_2020_football_analysis/master/data/big5_full_stats.csv')

In [3]:
# string to number
df['playing_minutes'] = pd.to_numeric(df.playing_minutes.str.replace(',', ''))

In [4]:
# drop object columns
df.drop(['per90_matches', 'xg_team_success_matches'], axis=1, inplace=True)

In [5]:
# keep young players (aged 23 or under playing 900 or greater minutes)
df = df[(df.age <= 24) & (df.playing_minutes >= 900) & (df.position != 'GK')].copy()

In [6]:
player_values_array = df[df.columns[7:]].values

In [7]:
# calculate mahalanobis distance and sort dataframe so largest top
cov = EmpiricalCovariance().fit(player_values_array)
dist = cov.mahalanobis(player_values_array)
df['dist'] = dist
df.sort_values('dist', ascending=False, inplace=True)
df.reset_index(drop=True, inplace=True)

In [10]:
df.head(40)

Unnamed: 0,player,nationality,position,squad,comp_level,age,birth_year,games,playing_games_starts,playing_minutes,...,carries_carries,carries_carry_distance,carries_carry_progressive_distance,receiving_pass_targets,receiving_passes_received,receiving_passes_received_pct,miscontrols,dispossessed,minutes_90,dist
0,Raphael Dias Belloli,BRA,"MF,FW",Rennes,Ligue 1,23.0,1996.0,22,20,1690,...,705.0,5193.0,3295.0,845.0,636.0,75.3,36.0,43.0,18.8,351.332151
1,Trent Alexander-Arnold,ENG,DF,Liverpool,Premier League,20.0,1998.0,38,35,3175,...,2108.0,9334.0,5698.0,2362.0,2230.0,94.4,36.0,22.0,35.3,341.878463
2,Raheem Sterling,ENG,FW,Manchester City,Premier League,24.0,1994.0,33,30,2651,...,1328.0,8796.0,5842.0,1688.0,1343.0,79.6,51.0,71.0,29.5,303.532727
3,Jack Grealish,ENG,"MF,FW",Aston Villa,Premier League,23.0,1995.0,36,36,3233,...,1582.0,15460.0,9105.0,1776.0,1467.0,82.6,62.0,78.0,35.9,290.627399
4,Marcus Rashford,ENG,FW,Manchester Utd,Premier League,21.0,1997.0,31,31,2643,...,1019.0,7483.0,5231.0,1557.0,1103.0,70.8,71.0,48.0,29.4,289.456896
5,Timo Werner,GER,FW,RB Leipzig,Bundesliga,23.0,1996.0,34,33,2795,...,1097.0,7326.0,4622.0,1663.0,1159.0,69.7,79.0,50.0,31.1,280.826221
6,Bruno Fernandes,POR,MF,Manchester Utd,Premier League,24.0,1994.0,14,14,1186,...,656.0,3376.0,2015.0,849.0,737.0,86.8,23.0,16.0,13.2,269.819576
7,James Ward-Prowse,ENG,"DF,MF",Southampton,Premier League,24.0,1994.0,38,38,3420,...,1114.0,4224.0,1700.0,1362.0,1196.0,87.8,24.0,16.0,38.0,263.22925
8,Dwight McNeil,ENG,MF,Burnley,Premier League,19.0,1999.0,38,38,3338,...,1276.0,11287.0,5734.0,1236.0,1037.0,83.9,39.0,81.0,37.1,253.017347
9,Andrea Petagna,ITA,FW,SPAL,Serie A,24.0,1995.0,35,32,2942,...,995.0,7545.0,3172.0,1541.0,1025.0,66.5,79.0,82.0,32.7,252.727351
