# Moneyball Project: UEFA Euro 2020 Fantasy Football
Passion project to leverage data-driven decision making for team selection in [UEFA Euro 2020 Fantasy Football](https://gaming.uefa.com/en/uefaeuro2020fantasyfootball/overview)

## Data Preparation and Cleansing
-----------------------------
### Purpose
Initial exploration on available dataset, aggregating and merging to dataframe for further exploration.

### Author
[Christian Wibisono](https://github.com/christianwbsn)



## 1. Import Library

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import json
import re

pd.set_option('display.max_columns', 500)
pd.set_option('display.max_row',50) 

In [2]:
import difflib
from tqdm import tqdm
from nltk import everygrams

In [3]:
DATA_DIR = "../data"

## 2. Common Function

In [4]:
def camel_to_snake(name):
    name = re.sub(" ", "", name)
    name = re.sub('(.)([A-Z][a-z]+)', r'\1_\2', name)
    return re.sub('([a-z0-9])([A-Z])', r'\1_\2', name).lower()

In [5]:
def extract_date(date):
    return pd.Series([date.year, date.month, date.day])

In [6]:
def euro_fantasy_score(df):
    # Not covered by dataset
    # Common - Goal from outside the box 2 points
    # Common - Winning a penalty  2 points
    # Common - Conceding a penalty -1 points
    # Common - Own Goal -2 points
   
    # common
    score = 1 
    if df["min"] >= 60:
        score += 1
    if df["assists"] > 0:
        score += (df["assists"] * 3)
    if df["penalty_kick_miss"] > 0:
        score -= (df["penalty_kick_miss"] * 2)
    if df["yellow_cards"] > 0:
        score -= 1
    if df["red_cards"] > 0:
        score -= 3
        
    # position specific    
    if df["position"] == "F":
        score += (df["goals"] * 4)
    if df["position"] == "M":
        score += (df["goals"] * 5)
        if df["min"] >= 60 and df["clean_sheet"] > 0:
            score += 1
    if df["position"] == "D":
        score += (df["goals"] * 6)
        if df["min"] >= 60 and df["clean_sheet"] > 0:
            score += 4
        score -= (df['goals_allowed'] // 2)
    if df["position"] == "GK":
        score += (df["goals"] * 6)
        score += (df["penalty_kick_saved"] * 5)
        if df["min"] >= 60 and df["clean_sheet"] > 0:
            score += 4
        score += (df["saves"] // 3)
        score -= (df["goals_allowed"] // 2)
    return score

## 3. Dataset Exploration

### 3.1 Main Dataset

In [7]:
## Using this dataset as SSOT for player name and team name
player  = pd.read_excel("{}/raw/fantasy_data.xlsx".format(DATA_DIR), sheet_name=0)
goalie  = pd.read_excel("{}/raw/fantasy_data.xlsx".format(DATA_DIR), sheet_name=1)

In [8]:
player.head()

Unnamed: 0,Player,Date,LeagueName,TeamName,OpponentName,FantasyPoints,Min,Position,Goals,Assists,Shots,Shots On Goal,Crosses,Fouls Drawn,Fouls Committed,Tackles Won,Interceptions,Yellow Cards,Red Cards,Penalty Kick Miss,Clean Sheet,Goals Allowed,Accurate Passes,Shots Assisted,Shootout Goals,Shootout Misses,Game Started
0,Artem Dzyuba,2019-06-08,European Championship Qualifiers,Russia,San Marino,73.66,90,F,4,1,16,7,1,1,1,0,0,0,0,1,1,0,23,3,0,0,1
1,Cristiano Ronaldo,2019-09-10,European Championship Qualifiers,Portugal,Lithuania,56.7,79,F,4,0,8,5,0,0,0,0,0,0,0,0,0,1,35,3,0,0,1
2,Memphis Depay,2019-03-21,European Championship Qualifiers,Netherlands,Belarus,54.0,90,F,2,2,6,4,6,3,0,1,0,0,0,0,1,0,40,3,0,0,1
3,Denis Cheryshev,2019-10-13,European Championship Qualifiers,Russia,Cyprus,53.32,90,M,2,2,4,3,13,0,3,1,0,0,0,0,1,0,36,5,0,0,1
4,Cristiano Ronaldo,2019-11-14,European Championship Qualifiers,Portugal,Lithuania,51.8,83,F,3,0,13,5,0,1,0,0,0,0,0,0,1,0,40,2,0,0,1


In [9]:
player.rename(camel_to_snake, axis=1, inplace=True)
goalie.rename(camel_to_snake, axis=1, inplace=True)

In [10]:
player["player"]  = player["player"].apply(lambda x: x.strip())
goalie["player"]  = goalie["player"].apply(lambda x: x.strip())

In [11]:
player["team_name"]  = player["team_name"].apply(lambda x: x.strip())
goalie["team_name"]  = goalie["team_name"].apply(lambda x: x.strip())

In [12]:
player["date"] = pd.to_datetime(player["date"])
goalie["date"] = pd.to_datetime(goalie["date"])

In [13]:
player[["year", "month", "day"]] = player["date"].apply(extract_date)
goalie[["year", "month", "day"]] = goalie["date"].apply(extract_date)

In [14]:
player.shape

(6489, 30)

In [15]:
goalie.shape

(558, 34)

In [16]:
main_df = pd.concat([player, goalie])

In [17]:
main_df.shape

(7047, 34)

In [18]:
# if players have multiple position choose the most common position
position = main_df.groupby("player").agg(position=('position',
                                                   lambda x: x.value_counts().sort_index().sort_values(ascending=False).index[0]))

In [19]:
main_df["position"] = main_df['player'].apply(lambda x: position.loc[x]["position"])

In [20]:
# hardcoded some players with non-fixed (M/F) position
map_position = {
    "Ferran Torres" : "M",
    "Serge Gnabry": "M", 
    "Raheem Sterling": "M",
    "Diogo Jota": "M",
    "Steven Berghuis": "M",
    "Admir Mehmedi" : "F",
    "Marko Arnautovic": "F",
    "Kingsley Coman": "M",
    "Dries Mertens": "F",
    "Bernardo Silva": "M",
    "Lorenzo Insigne": "M",
    "Emil Forsberg": "M", 
    "Leroy Sané": "M",
    "Nikola Vlasic": "M",
    "Domenico Berardi": "M",
    "Gonçalo Guedes": "M", 
    "João Félix": "M",
    "Mikel Oyarzabal": "F",
    "Mislav Orsic": "M",
    "Adama Traoré": "M",
    "Emil Forsberg": "M",
    "Steven Berghuis":"M",
    "Mislav Orsic": "M",
    "Mikel Oyarzabal": "F",
    "David Alaba": "M"
}

main_df["position"] = main_df.apply(lambda x: map_position[x["player"]] if x["player"] in map_position.keys() else x["position"], axis=1)

In [21]:
main_df["position"].value_counts()

M     2926
D     2307
F     1256
GK     558
Name: position, dtype: int64

In [22]:
main_df["points"] = main_df.apply(euro_fantasy_score, axis=1)

In [23]:
pd.pivot_table(main_df, index=["position"], values=["points"], aggfunc="mean")

Unnamed: 0_level_0,points
position,Unnamed: 1_level_1
D,3.293888
F,3.226115
GK,3.763441
M,2.914901


### 3.2 Euro 2020 Dataset

#### 3.2.1 Players

#### 3.2.1.1 Appending last matchday data

In [24]:
with open('{}/raw/euro-2020/players_1.json'.format(DATA_DIR))as f:
    data    = json.load(f)
    players = data["data"]["value"]["playerList"]

In [25]:
players_df = pd.json_normalize(players)

In [26]:
players_df.rename(camel_to_snake, axis=1, inplace=True)

In [27]:
players_df = players_df[players_df["trained"]!='']

In [28]:
players_df.head()

Unnamed: 0,id,p_d_name,p_f_name,latin_name,t_name,t_id,team_played,c_code,skill,value,is_active,sel_per,md_id,tot_pts,g_s,assist,c_s,g_c,y_c,r_c,o_g,p_s,p_c,p_e,saves,p_m,b_r,g_ob,m_om,m_om_pts,p_status,match_atd,trained,is_played,sel_in_per,sel_out_per,upcoming_matches_list,current_matches_list,avg_player_pts,avg_player_value,last_gd_points,category1,category2,category3,category4,category5,category6,category7,category8,category9,category10,category11,category12,category13,category14,category15
0,63706,C. Ronaldo,Cristiano Ronaldo,Cristiano Ronaldo,Portugal,110,1,POR,4,12.0,1,30.0,1,10,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,,,In contention to start next game,0,2.0,1.0,"[{'mdId': '2', 'tId': '110', 'tSCode': 'Portug...","[{'mdId': '1', 'tId': '110', 'tSCode': 'Portug...",10.0,0.8,10.0,1.0,0.0,17.0,1.5,3.3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
1,250076574,K. Mbappé,Kylian Mbappé,Kylian Mbappe,France,43,1,FRA,4,12.0,1,35.0,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,,,In contention to start next game,0,1.0,1.0,"[{'mdId': '2', 'tId': '43', 'tSCode': 'France'...","[{'mdId': '1', 'tId': '43', 'tSCode': 'France'...",2.0,0.2,2.0,1.0,0.0,9.0,0.9,2.4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
2,250016833,H. Kane,Harry Kane,Harry Kane,England,39,1,ENG,4,11.5,1,36.0,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,,,In contention to start next game,0,1.0,1.0,"[{'mdId': '2', 'tId': '39', 'tSCode': 'England...","[{'mdId': '1', 'tId': '39', 'tSCode': 'England...",2.0,0.2,2.0,1.0,0.0,11.0,1.4,0.4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
3,250002096,R. Lewandowski,Robert Lewandowski,Robert Lewandowski,Poland,109,1,POL,4,11.5,1,15.0,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,,,In contention to start next game,0,1.0,1.0,"[{'mdId': '2', 'tId': '109', 'tSCode': 'Poland...","[{'mdId': '1', 'tId': '109', 'tSCode': 'Poland...",2.0,0.2,2.0,1.0,0.0,5.0,0.7,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
4,250010802,R. Lukaku,Romelu Lukaku,Romelu Lukaku,Belgium,13,1,BEL,4,11.0,1,49.0,1,10,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,,,In contention to start next game,0,2.0,1.0,"[{'mdId': '2', 'tId': '13', 'tSCode': 'Belgium...","[{'mdId': '1', 'tId': '13', 'tSCode': 'Belgium...",10.0,0.9,10.0,1.0,0.0,22.0,2.2,1.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0


In [29]:
players_df["date"] = players_df["current_matches_list"].apply(lambda x: x[0]["matchDate"])
players_df["opponent_name"] = players_df["current_matches_list"].apply(lambda x: x[0]["vsTSCode"])

In [30]:
all_players_name = main_df["player"].unique()
def get_closest_match(name):
    # return closest match for join operation
    return ''.join(list(difflib.get_close_matches(name, all_players_name, n=1, cutoff=0.7)))

In [31]:
players_df["closest_match"] = players_df["p_f_name"].apply(get_closest_match)
players_df["player"] = players_df.apply(lambda x: x["closest_match"] if x["closest_match"] != "" else x["p_f_name"], axis=1)

In [32]:
players_df["date"] = pd.to_datetime(players_df["date"])
players_df[["year", "month", "day"]] = players_df["date"].apply(extract_date)

In [33]:
main_df.head()

Unnamed: 0,player,date,league_name,team_name,opponent_name,fantasy_points,min,position,goals,assists,shots,shots_on_goal,crosses,fouls_drawn,fouls_committed,tackles_won,interceptions,yellow_cards,red_cards,penalty_kick_miss,clean_sheet,goals_allowed,accurate_passes,shots_assisted,shootout_goals,shootout_misses,game_started,year,month,day,saves,wins,penalty_kick_saved,shootout_saves,points
0,Artem Dzyuba,2019-06-08,European Championship Qualifiers,Russia,San Marino,73.66,90,F,4,1,16,7,1,1,1,0,0,0,0,1,1,0,23,3,0,0,1,2019,6,8,,,,,19.0
1,Cristiano Ronaldo,2019-09-10,European Championship Qualifiers,Portugal,Lithuania,56.7,79,F,4,0,8,5,0,0,0,0,0,0,0,0,0,1,35,3,0,0,1,2019,9,10,,,,,18.0
2,Memphis Depay,2019-03-21,European Championship Qualifiers,Netherlands,Belarus,54.0,90,F,2,2,6,4,6,3,0,1,0,0,0,0,1,0,40,3,0,0,1,2019,3,21,,,,,16.0
3,Denis Cheryshev,2019-10-13,European Championship Qualifiers,Russia,Cyprus,53.32,90,M,2,2,4,3,13,0,3,1,0,0,0,0,1,0,36,5,0,0,1,2019,10,13,,,,,19.0
4,Cristiano Ronaldo,2019-11-14,European Championship Qualifiers,Portugal,Lithuania,51.8,83,F,3,0,13,5,0,1,0,0,0,0,0,0,1,0,40,2,0,0,1,2019,11,14,,,,,14.0


In [34]:
players_df.head()

Unnamed: 0,id,p_d_name,p_f_name,latin_name,t_name,t_id,team_played,c_code,skill,value,is_active,sel_per,md_id,tot_pts,g_s,assist,c_s,g_c,y_c,r_c,o_g,p_s,p_c,p_e,saves,p_m,b_r,g_ob,m_om,m_om_pts,p_status,match_atd,trained,is_played,sel_in_per,sel_out_per,upcoming_matches_list,current_matches_list,avg_player_pts,avg_player_value,last_gd_points,category1,category2,category3,category4,category5,category6,category7,category8,category9,category10,category11,category12,category13,category14,category15,date,opponent_name,closest_match,player,year,month,day
0,63706,C. Ronaldo,Cristiano Ronaldo,Cristiano Ronaldo,Portugal,110,1,POR,4,12.0,1,30.0,1,10,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,,,In contention to start next game,0,2.0,1.0,"[{'mdId': '2', 'tId': '110', 'tSCode': 'Portug...","[{'mdId': '1', 'tId': '110', 'tSCode': 'Portug...",10.0,0.8,10.0,1.0,0.0,17.0,1.5,3.3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,2021-06-15 18:00:00,Hungary,Cristiano Ronaldo,Cristiano Ronaldo,2021,6,15
1,250076574,K. Mbappé,Kylian Mbappé,Kylian Mbappe,France,43,1,FRA,4,12.0,1,35.0,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,,,In contention to start next game,0,1.0,1.0,"[{'mdId': '2', 'tId': '43', 'tSCode': 'France'...","[{'mdId': '1', 'tId': '43', 'tSCode': 'France'...",2.0,0.2,2.0,1.0,0.0,9.0,0.9,2.4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,2021-06-15 21:00:00,Germany,Kylian Mbappé,Kylian Mbappé,2021,6,15
2,250016833,H. Kane,Harry Kane,Harry Kane,England,39,1,ENG,4,11.5,1,36.0,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,,,In contention to start next game,0,1.0,1.0,"[{'mdId': '2', 'tId': '39', 'tSCode': 'England...","[{'mdId': '1', 'tId': '39', 'tSCode': 'England...",2.0,0.2,2.0,1.0,0.0,11.0,1.4,0.4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,2021-06-13 15:00:00,Croatia,Harry Kane,Harry Kane,2021,6,13
3,250002096,R. Lewandowski,Robert Lewandowski,Robert Lewandowski,Poland,109,1,POL,4,11.5,1,15.0,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,,,In contention to start next game,0,1.0,1.0,"[{'mdId': '2', 'tId': '109', 'tSCode': 'Poland...","[{'mdId': '1', 'tId': '109', 'tSCode': 'Poland...",2.0,0.2,2.0,1.0,0.0,5.0,0.7,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,2021-06-14 18:00:00,Slovakia,Robert Lewandowski,Robert Lewandowski,2021,6,14
4,250010802,R. Lukaku,Romelu Lukaku,Romelu Lukaku,Belgium,13,1,BEL,4,11.0,1,49.0,1,10,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,,,In contention to start next game,0,2.0,1.0,"[{'mdId': '2', 'tId': '13', 'tSCode': 'Belgium...","[{'mdId': '1', 'tId': '13', 'tSCode': 'Belgium...",10.0,0.9,10.0,1.0,0.0,22.0,2.2,1.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,2021-06-12 21:00:00,Russia,Romelu Lukaku,Romelu Lukaku,2021,6,12


In [35]:
with open('{}/raw/euro-2020/fixtures.json'.format(DATA_DIR))as f:
    data    = json.load(f)
    fixtures = data["data"]["value"][0]["match"]

In [36]:
fixtures_df = pd.json_normalize(fixtures)
fixtures_df["atName"] = fixtures_df["atName"].apply(lambda x: x.strip())
fixtures_df["htName"] = fixtures_df["htName"].apply(lambda x: x.strip())

In [37]:
def heuristic_minutes_played(df):
    if (df["g_s"] == 0) and (df["assist"] == 0) and (df["y_c"] == 0) and (df["r_c"] == 0) and (df["last_gd_points"] == 2):
        return 90
    elif df["last_gd_points"] >= 2:
        return 90
    elif df["last_gd_points"] == 0:
        return 0
    else:
        return 59

In [38]:
players_df["min"] = players_df.apply(heuristic_minutes_played, axis=1)
players_df.rename(columns={"t_name": "team_name", "g_s": "goals", "assist": "assists", 
                           "y_c": "yellow_cards", "r_c" : "red_cards", "last_gd_points": "points",
                           "p_m": "penalty_kick_miss"}, inplace=True)
players_df["league_name"] = "European Championship 2020"

In [39]:
fixtures_df.head()

Unnamed: 0,mId,gmIsCurrent,gmIsLocked,gdId,isFeedLive,isLive,dateTime,dateTimeLock,htId,htName,htShortName,htCCode,atId,atName,atShortName,atCCode,gameNo,mdName,htScore,atScore,htAggScore,atAggScore,aggDescription,aggFlag,matchStatus,teamSc,teamScStartDate,teamScEndDate,lineupAnnounced,isMatchPostponed,groupId,groupName,stadiumId,stadiumName,stadiumThumb,venueId,venueName,venueCountryCode
0,2024447,2,1,1,2,2,06/11/2021 21:00:00,06/11/2021 21:00:00,135,Turkey,Turkey,TUR,66,Italy,Italy,ITA,33673,Match 1,0,3,0,0,,0,2,,,,1,0,2006438,Group A,57775,Olimpico in Rome,https://img.uefa.com/imgml/stadium/matchinfo/w...,2637,Rome,ITA
1,2024448,2,1,2,2,2,06/12/2021 15:00:00,06/12/2021 15:00:00,144,Wales,Wales,WAL,128,Switzerland,Switzerland,SUI,33673,Match 4,1,1,0,0,,0,2,,,,1,0,2006438,Group A,250002745,Bakı Olimpiya Stadionu,https://img.uefa.com/imgml/stadium/matchinfo/w...,1162,Baku,AZE
2,2024449,2,1,2,2,2,06/12/2021 18:00:00,06/12/2021 18:00:00,35,Denmark,Denmark,DEN,42,Finland,Finland,FIN,33673,Match 3,0,1,0,0,,0,2,,,,1,0,2006439,Group B,63462,Parken,https://img.uefa.com/imgml/stadium/matchinfo/w...,1449,Copenhagen,DEN
3,2024450,2,1,2,2,2,06/12/2021 21:00:00,06/12/2021 21:00:00,13,Belgium,Belgium,BEL,57451,Russia,Russia,RUS,33673,Match 2,3,0,0,0,,0,2,,,,1,0,2006439,Group B,250003363,Gazprom Arena,https://img.uefa.com/imgml/stadium/matchinfo/w...,2850,St Petersburg,RUS
4,2024451,2,1,3,2,2,06/13/2021 15:00:00,06/13/2021 15:00:00,39,England,England,ENG,56370,Croatia,Croatia,CRO,33673,Match 5,1,0,0,0,,0,2,,,,1,0,2006441,Group D,1100043,Wembley Stadium,https://img.uefa.com/imgml/stadium/matchinfo/w...,2116,London,ENG


In [40]:
fixtures_df[fixtures_df["htName"] == "Belgium"]["htScore"]

3    3
Name: htScore, dtype: object

In [41]:
def goals_allowed(df):
    if df["team_name"] in fixtures_df["htName"].values:
        return int(fixtures_df[fixtures_df["htName"] == df["team_name"]].reset_index()["atScore"])
    else:
        return int(fixtures_df[fixtures_df["atName"] == df["team_name"]].reset_index()["htScore"])

In [42]:
players_df["goals_allowed"] = players_df.apply(goals_allowed, axis=1)
players_df["clean_sheet"] = players_df["goals_allowed"].apply(lambda x: 1 if x == 0 else 0)
players_df["game_started"] = players_df["min"].apply(lambda x: 1 if x >= 60 else 0)

In [43]:
players_df.shape

(620, 68)

In [44]:
# if players have multiple position choose the most common position
position = main_df.groupby("player").agg(position=('position',
                                                   lambda x: x.value_counts().sort_index().sort_values(ascending=False).index[0])).to_dict()["position"]

In [45]:
players_df["position"] = players_df["player"].apply(lambda x: position[x]
                                                    if x in position.keys() else "")

In [46]:
players_df.shape

(620, 69)

In [47]:
players_df

Unnamed: 0,id,p_d_name,p_f_name,latin_name,team_name,t_id,team_played,c_code,skill,value,is_active,sel_per,md_id,tot_pts,goals,assists,c_s,g_c,yellow_cards,red_cards,o_g,p_s,p_c,p_e,saves,penalty_kick_miss,b_r,g_ob,m_om,m_om_pts,p_status,match_atd,trained,is_played,sel_in_per,sel_out_per,upcoming_matches_list,current_matches_list,avg_player_pts,avg_player_value,points,category1,category2,category3,category4,category5,category6,category7,category8,category9,category10,category11,category12,category13,category14,category15,date,opponent_name,closest_match,player,year,month,day,min,league_name,goals_allowed,clean_sheet,game_started,position
0,63706,C. Ronaldo,Cristiano Ronaldo,Cristiano Ronaldo,Portugal,110,1,POR,4,12.0,1,30.0,1,10,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,,,In contention to start next game,0,2.0,1.0,"[{'mdId': '2', 'tId': '110', 'tSCode': 'Portug...","[{'mdId': '1', 'tId': '110', 'tSCode': 'Portug...",10.0,0.8,10.0,1.0,0.0,17.0,1.5,3.3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,2021-06-15 18:00:00,Hungary,Cristiano Ronaldo,Cristiano Ronaldo,2021,6,15,90,European Championship 2020,0,1,1,F
1,250076574,K. Mbappé,Kylian Mbappé,Kylian Mbappe,France,43,1,FRA,4,12.0,1,35.0,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,,,In contention to start next game,0,1.0,1.0,"[{'mdId': '2', 'tId': '43', 'tSCode': 'France'...","[{'mdId': '1', 'tId': '43', 'tSCode': 'France'...",2.0,0.2,2.0,1.0,0.0,9.0,0.9,2.4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,2021-06-15 21:00:00,Germany,Kylian Mbappé,Kylian Mbappé,2021,6,15,90,European Championship 2020,0,1,1,F
2,250016833,H. Kane,Harry Kane,Harry Kane,England,39,1,ENG,4,11.5,1,36.0,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,,,In contention to start next game,0,1.0,1.0,"[{'mdId': '2', 'tId': '39', 'tSCode': 'England...","[{'mdId': '1', 'tId': '39', 'tSCode': 'England...",2.0,0.2,2.0,1.0,0.0,11.0,1.4,0.4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,2021-06-13 15:00:00,Croatia,Harry Kane,Harry Kane,2021,6,13,90,European Championship 2020,0,1,1,F
3,250002096,R. Lewandowski,Robert Lewandowski,Robert Lewandowski,Poland,109,1,POL,4,11.5,1,15.0,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,,,In contention to start next game,0,1.0,1.0,"[{'mdId': '2', 'tId': '109', 'tSCode': 'Poland...","[{'mdId': '1', 'tId': '109', 'tSCode': 'Poland...",2.0,0.2,2.0,1.0,0.0,5.0,0.7,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,2021-06-14 18:00:00,Slovakia,Robert Lewandowski,Robert Lewandowski,2021,6,14,90,European Championship 2020,2,0,1,F
4,250010802,R. Lukaku,Romelu Lukaku,Romelu Lukaku,Belgium,13,1,BEL,4,11.0,1,49.0,1,10,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,,,In contention to start next game,0,2.0,1.0,"[{'mdId': '2', 'tId': '13', 'tSCode': 'Belgium...","[{'mdId': '1', 'tId': '13', 'tSCode': 'Belgium...",10.0,0.9,10.0,1.0,0.0,22.0,2.2,1.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,2021-06-12 21:00:00,Russia,Romelu Lukaku,Romelu Lukaku,2021,6,12,90,European Championship 2020,0,1,1,F
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1043,250117658,R. Colwill,Rubin Colwill,Rubin Colwill,Wales,144,1,WAL,3,4.0,1,10.0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,,,In contention to start next game,0,1.0,0.0,"[{'mdId': '2', 'tId': '144', 'tSCode': 'Wales'...","[{'mdId': '1', 'tId': '144', 'tSCode': 'Wales'...",0.0,0.0,0.0,0.0,0.0,0.0,0.6,0.9,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2021-06-12 15:00:00,Switzerland,,Rubin Colwill,2021,6,12,0,European Championship 2020,1,0,0,
1046,72525,J. Toivio,Joona Toivio,Joona Toivio,Finland,42,1,FIN,2,4.0,1,1.0,1,6,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,,,In contention to start next game,0,0.0,0.0,"[{'mdId': '2', 'tId': '42', 'tSCode': 'Finland...","[{'mdId': '1', 'tId': '42', 'tSCode': 'Finland...",6.0,1.5,6.0,0.0,0.0,0.0,0.1,0.3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2021-06-12 18:00:00,Denmark,,Joona Toivio,2021,6,12,90,European Championship 2020,0,1,1,
1048,1908198,L. Négo,Loïc Négo,Loic Nego,Hungary,57,1,HUN,3,4.0,1,34.0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,,,In contention to start next game,0,2.0,2.0,"[{'mdId': '2', 'tId': '57', 'tSCode': 'Hungary...","[{'mdId': '1', 'tId': '57', 'tSCode': 'Hungary...",0.0,0.0,0.0,0.0,0.0,0.0,2.0,1.6,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2021-06-15 18:00:00,Portugal,Loic Nego,Loic Nego,2021,6,15,0,European Championship 2020,3,0,0,D
1050,250006714,M. Bizot,Marco Bizot,Marco Bizot,Netherlands,95,1,NED,1,4.0,1,1.0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,,,In contention to start next game,0,0.0,0.0,"[{'mdId': '2', 'tId': '95', 'tSCode': 'Netherl...","[{'mdId': '1', 'tId': '95', 'tSCode': 'Netherl...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2021-06-13 21:00:00,Ukraine,Marco Bizot,Marco Bizot,2021,6,13,0,European Championship 2020,2,0,0,GK


In [48]:
def update_data(df):
    main_df_columns = ["player", "date", "league_name", "game_started",
                       "team_name", "opponent_name", "position", "goals_allowed", "clean_sheet",
                       "year", "month", "day", "min", "goals", "assists", 
                       "penalty_kick_miss","yellow_cards", "red_cards", "saves", "points"]
    return df[main_df_columns]

In [49]:
new_train = update_data(players_df)

In [50]:
main_df = pd.concat([main_df, new_train])

In [51]:
main_df = main_df.fillna(0)

In [52]:
main_df.to_csv("{}/interim/md_1_df.csv".format(DATA_DIR), index=False)

#### 3.2.1.2 Generating test data

In [53]:
with open('{}/raw/euro-2020/players_2.json'.format(DATA_DIR))as f:
    data    = json.load(f)
    players = data["data"]["value"]["playerList"]

In [54]:
players_df = pd.json_normalize(players)

In [55]:
players_df.rename(camel_to_snake, axis=1, inplace=True)

In [56]:
players_df = players_df[players_df["trained"]!='']

In [57]:
players_df.head()

Unnamed: 0,id,p_d_name,p_f_name,latin_name,t_name,t_id,team_played,c_code,skill,value,is_active,sel_per,md_id,tot_pts,g_s,assist,c_s,g_c,y_c,r_c,o_g,p_s,p_c,p_e,saves,p_m,b_r,g_ob,m_om,m_om_pts,p_status,match_atd,trained,is_played,sel_in_per,sel_out_per,upcoming_matches_list,current_matches_list,avg_player_pts,avg_player_value,last_gd_points,category1,category2,category3,category4,category5,category6,category7,category8,category9,category10,category11,category12,category13,category14,category15
0,63706,C. Ronaldo,Cristiano Ronaldo,Cristiano Ronaldo,Portugal,110,1,POR,4,12.0,1,30.0,1,10,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,,,In contention to start next game,0,2.0,1.0,"[{'mdId': '2', 'tId': '110', 'tSCode': 'Portug...","[{'mdId': '1', 'tId': '110', 'tSCode': 'Portug...",10.0,0.8,10.0,1.0,0.0,17.0,1.5,3.3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
1,250076574,K. Mbappé,Kylian Mbappé,Kylian Mbappe,France,43,1,FRA,4,12.0,1,35.0,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,,,In contention to start next game,0,1.0,1.0,"[{'mdId': '2', 'tId': '43', 'tSCode': 'France'...","[{'mdId': '1', 'tId': '43', 'tSCode': 'France'...",2.0,0.2,2.0,1.0,0.0,9.0,0.9,2.4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
2,250016833,H. Kane,Harry Kane,Harry Kane,England,39,1,ENG,4,11.5,1,36.0,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,,,In contention to start next game,0,1.0,1.0,"[{'mdId': '2', 'tId': '39', 'tSCode': 'England...","[{'mdId': '1', 'tId': '39', 'tSCode': 'England...",2.0,0.2,2.0,1.0,0.0,11.0,1.4,0.4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
3,250002096,R. Lewandowski,Robert Lewandowski,Robert Lewandowski,Poland,109,1,POL,4,11.5,1,15.0,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,,,In contention to start next game,0,1.0,1.0,"[{'mdId': '2', 'tId': '109', 'tSCode': 'Poland...","[{'mdId': '1', 'tId': '109', 'tSCode': 'Poland...",2.0,0.2,2.0,1.0,0.0,5.0,0.7,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
4,250010802,R. Lukaku,Romelu Lukaku,Romelu Lukaku,Belgium,13,1,BEL,4,11.0,1,49.0,1,10,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,,,In contention to start next game,0,2.0,1.0,"[{'mdId': '2', 'tId': '13', 'tSCode': 'Belgium...","[{'mdId': '1', 'tId': '13', 'tSCode': 'Belgium...",10.0,0.9,10.0,1.0,0.0,22.0,2.2,1.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0


In [58]:
players_df["date"] = players_df["upcoming_matches_list"].apply(lambda x: x[0]["matchDate"])
players_df["opponent_name"] = players_df["upcoming_matches_list"].apply(lambda x: x[0]["vsTSCode"])

In [59]:
all_players_name = main_df["player"].unique()
def get_closest_match(name):
    # return closest match for join operation
    return ''.join(list(difflib.get_close_matches(name, all_players_name, n=1, cutoff=0.7)))

In [60]:
players_df["closest_match"] = players_df["p_f_name"].apply(get_closest_match)
players_df["player"] = players_df.apply(lambda x: x["closest_match"] if x["closest_match"] != "" else x["p_f_name"], axis=1)

In [61]:
players_df["date"] = pd.to_datetime(players_df["date"])
players_df[["year", "month", "day"]] = players_df["date"].apply(extract_date)

In [62]:
main_df.head()

Unnamed: 0,player,date,league_name,team_name,opponent_name,fantasy_points,min,position,goals,assists,shots,shots_on_goal,crosses,fouls_drawn,fouls_committed,tackles_won,interceptions,yellow_cards,red_cards,penalty_kick_miss,clean_sheet,goals_allowed,accurate_passes,shots_assisted,shootout_goals,shootout_misses,game_started,year,month,day,saves,wins,penalty_kick_saved,shootout_saves,points
0,Artem Dzyuba,2019-06-08,European Championship Qualifiers,Russia,San Marino,73.66,90,F,4,1,16.0,7.0,1.0,1.0,1.0,0.0,0.0,0,0,1,1,0,23.0,3.0,0.0,0.0,1,2019,6,8,0.0,0.0,0.0,0.0,19.0
1,Cristiano Ronaldo,2019-09-10,European Championship Qualifiers,Portugal,Lithuania,56.7,79,F,4,0,8.0,5.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,1,35.0,3.0,0.0,0.0,1,2019,9,10,0.0,0.0,0.0,0.0,18.0
2,Memphis Depay,2019-03-21,European Championship Qualifiers,Netherlands,Belarus,54.0,90,F,2,2,6.0,4.0,6.0,3.0,0.0,1.0,0.0,0,0,0,1,0,40.0,3.0,0.0,0.0,1,2019,3,21,0.0,0.0,0.0,0.0,16.0
3,Denis Cheryshev,2019-10-13,European Championship Qualifiers,Russia,Cyprus,53.32,90,M,2,2,4.0,3.0,13.0,0.0,3.0,1.0,0.0,0,0,0,1,0,36.0,5.0,0.0,0.0,1,2019,10,13,0.0,0.0,0.0,0.0,19.0
4,Cristiano Ronaldo,2019-11-14,European Championship Qualifiers,Portugal,Lithuania,51.8,83,F,3,0,13.0,5.0,0.0,1.0,0.0,0.0,0.0,0,0,0,1,0,40.0,2.0,0.0,0.0,1,2019,11,14,0.0,0.0,0.0,0.0,14.0


In [63]:
players_df.head()

Unnamed: 0,id,p_d_name,p_f_name,latin_name,t_name,t_id,team_played,c_code,skill,value,is_active,sel_per,md_id,tot_pts,g_s,assist,c_s,g_c,y_c,r_c,o_g,p_s,p_c,p_e,saves,p_m,b_r,g_ob,m_om,m_om_pts,p_status,match_atd,trained,is_played,sel_in_per,sel_out_per,upcoming_matches_list,current_matches_list,avg_player_pts,avg_player_value,last_gd_points,category1,category2,category3,category4,category5,category6,category7,category8,category9,category10,category11,category12,category13,category14,category15,date,opponent_name,closest_match,player,year,month,day
0,63706,C. Ronaldo,Cristiano Ronaldo,Cristiano Ronaldo,Portugal,110,1,POR,4,12.0,1,30.0,1,10,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,,,In contention to start next game,0,2.0,1.0,"[{'mdId': '2', 'tId': '110', 'tSCode': 'Portug...","[{'mdId': '1', 'tId': '110', 'tSCode': 'Portug...",10.0,0.8,10.0,1.0,0.0,17.0,1.5,3.3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,2021-06-19 18:00:00,Germany,Cristiano Ronaldo,Cristiano Ronaldo,2021,6,19
1,250076574,K. Mbappé,Kylian Mbappé,Kylian Mbappe,France,43,1,FRA,4,12.0,1,35.0,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,,,In contention to start next game,0,1.0,1.0,"[{'mdId': '2', 'tId': '43', 'tSCode': 'France'...","[{'mdId': '1', 'tId': '43', 'tSCode': 'France'...",2.0,0.2,2.0,1.0,0.0,9.0,0.9,2.4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,2021-06-19 15:00:00,Hungary,Kylian Mbappé,Kylian Mbappé,2021,6,19
2,250016833,H. Kane,Harry Kane,Harry Kane,England,39,1,ENG,4,11.5,1,36.0,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,,,In contention to start next game,0,1.0,1.0,"[{'mdId': '2', 'tId': '39', 'tSCode': 'England...","[{'mdId': '1', 'tId': '39', 'tSCode': 'England...",2.0,0.2,2.0,1.0,0.0,11.0,1.4,0.4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,2021-06-18 21:00:00,Scotland,Harry Kane,Harry Kane,2021,6,18
3,250002096,R. Lewandowski,Robert Lewandowski,Robert Lewandowski,Poland,109,1,POL,4,11.5,1,15.0,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,,,In contention to start next game,0,1.0,1.0,"[{'mdId': '2', 'tId': '109', 'tSCode': 'Poland...","[{'mdId': '1', 'tId': '109', 'tSCode': 'Poland...",2.0,0.2,2.0,1.0,0.0,5.0,0.7,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,2021-06-19 21:00:00,Spain,Robert Lewandowski,Robert Lewandowski,2021,6,19
4,250010802,R. Lukaku,Romelu Lukaku,Romelu Lukaku,Belgium,13,1,BEL,4,11.0,1,49.0,1,10,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,,,In contention to start next game,0,2.0,1.0,"[{'mdId': '2', 'tId': '13', 'tSCode': 'Belgium...","[{'mdId': '1', 'tId': '13', 'tSCode': 'Belgium...",10.0,0.9,10.0,1.0,0.0,22.0,2.2,1.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,2021-06-17 18:00:00,Denmark,Romelu Lukaku,Romelu Lukaku,2021,6,17


In [64]:
players_df.rename(columns={"t_name": "team_name"}, inplace=True)
players_df["league_name"] = "European Championship 2020"

In [65]:
players_df.shape

(620, 64)

In [66]:
# if players have multiple position choose the most common position
position = main_df.groupby("player").agg(position=('position',
                                                   lambda x: x.value_counts().sort_index().sort_values(ascending=False).index[0])).to_dict()["position"]

In [67]:
players_df["position"] = players_df["player"].apply(lambda x: position[x]
                                                    if x in position.keys() else "")

In [68]:
players_df.shape

(620, 65)

In [69]:
def generate_test_data(df):
    main_df_columns = ["player", "date", "league_name", 
                       "team_name", "opponent_name", "position",
                       "year", "month", "day"]
    return df[main_df_columns]

In [70]:
test = generate_test_data(players_df)

In [71]:
main_df = pd.concat([main_df, test])

In [72]:
main_df = pd.merge(main_df, players_df[["player", "value", "skill"]], on=["player"], how="left")

In [73]:
def get_agg_before(df):
    merged_df = df.copy()
    merged_df = pd.merge(merged_df, df, on=["player", "team_name"])
    merged_df = merged_df[merged_df['date_y'] < merged_df["date_x"]]
    merged_df["is_scoring"] = merged_df["goals_y"].apply(lambda x: 1 if x > 0 else 0)
    merged_df["is_assisting"] = merged_df["assists_y"].apply(lambda x: 1 if x > 0 else 0)
    merged_df_1 = merged_df.groupby(["player", "team_name", "date_x"]).agg(
                                                                        prev_mean_points=("points_y", "mean"),
                                                                        prev_mean_goals=("goals_y", "median"),
                                                                        prev_mean_assists=("assists_y", "mean"),
                                                                        prev_max_points=("points_y", "max"),
                                                                        prev_std_points=("points_y", "std"),
                                                                        prev_std_goals=("goals_y", "std"),
                                                                        prev_std_assists=("assists_y", "std"),
                                                                        prev_median_min=("min_y", "median"),
                                                                        prev_ratio_starter=("game_started_y", "mean"),
                                                                        count_played=("date_y","nunique"),
                                                                        goal_consistency=("is_scoring", "mean"),
                                                                        assist_consistency=("is_assisting", "mean"),
                                                                        clean_sheet_consistency=("clean_sheet_y", "mean")
                                                                       )
    
    merged_df_1 = merged_df_1.reset_index()
    merged_df_1.rename(columns={"date_x": "date"}, inplace=True)
    
    merged_df_2 = merged_df.groupby(["team_name", "date_x"]).agg(count_team_played=("date_y", "nunique"))
    merged_df_2 = merged_df_2.reset_index()
    merged_df_2.rename(columns={"date_x": "date"}, inplace=True)
    
    
    merged_df_3 = merged_df[merged_df["opponent_name_x"] == merged_df["opponent_name_y"]]
    merged_df_3 = merged_df_3.groupby(["player", "team_name", "date_x"]).agg(prev_max_goal_to_specific_opp=("goals_y", "max"),
                                                                           prev_max_points_to_specific_opp=("points_y", "max"),
                                                                           prev_mean_points_to_specific_opp=("points_y", "mean"))
    merged_df_3 = merged_df_3.reset_index()
    merged_df_3.rename(columns={"date_x": "date", "opponent_name_y": "opponent_name"}, inplace=True)
    
    merged_df = pd.merge(merged_df_1, merged_df_2, on=["team_name", "date"], how="left")
    merged_df = pd.merge(merged_df, merged_df_3, on=["player", "team_name", "date"], how="left")
    merged_df["prev_ratio_played"] = merged_df["count_played"] / merged_df["count_team_played"]
    return merged_df


In [74]:
agg = get_agg_before(main_df)

In [75]:
main_df.head()

Unnamed: 0,player,date,league_name,team_name,opponent_name,fantasy_points,min,position,goals,assists,shots,shots_on_goal,crosses,fouls_drawn,fouls_committed,tackles_won,interceptions,yellow_cards,red_cards,penalty_kick_miss,clean_sheet,goals_allowed,accurate_passes,shots_assisted,shootout_goals,shootout_misses,game_started,year,month,day,saves,wins,penalty_kick_saved,shootout_saves,points,value,skill
0,Artem Dzyuba,2019-06-08,European Championship Qualifiers,Russia,San Marino,73.66,90.0,F,4.0,1.0,16.0,7.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,23.0,3.0,0.0,0.0,1.0,2019,6,8,0.0,0.0,0.0,0.0,19.0,8.5,4.0
1,Cristiano Ronaldo,2019-09-10,European Championship Qualifiers,Portugal,Lithuania,56.7,79.0,F,4.0,0.0,8.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,35.0,3.0,0.0,0.0,1.0,2019,9,10,0.0,0.0,0.0,0.0,18.0,12.0,4.0
2,Memphis Depay,2019-03-21,European Championship Qualifiers,Netherlands,Belarus,54.0,90.0,F,2.0,2.0,6.0,4.0,6.0,3.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,40.0,3.0,0.0,0.0,1.0,2019,3,21,0.0,0.0,0.0,0.0,16.0,10.0,4.0
3,Denis Cheryshev,2019-10-13,European Championship Qualifiers,Russia,Cyprus,53.32,90.0,M,2.0,2.0,4.0,3.0,13.0,0.0,3.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,36.0,5.0,0.0,0.0,1.0,2019,10,13,0.0,0.0,0.0,0.0,19.0,8.0,3.0
4,Cristiano Ronaldo,2019-11-14,European Championship Qualifiers,Portugal,Lithuania,51.8,83.0,F,3.0,0.0,13.0,5.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,40.0,2.0,0.0,0.0,1.0,2019,11,14,0.0,0.0,0.0,0.0,14.0,12.0,4.0


In [76]:
main_df = main_df.sort_values(["player", "date"])

In [77]:
main_df['last_md_points'] = main_df.groupby("player")["points"].shift()
main_df['last_md_goals'] = main_df.groupby("player")["goals"].shift()
main_df['last_md_assists'] = main_df.groupby("player")["assists"].shift()

In [78]:
main_df = main_df.drop(["goals", "assists", "shots", "shots_on_goal", "crosses", "fouls_drawn", 
                        "fouls_committed", "tackles_won", "interceptions", "yellow_cards", "red_cards",
                        "penalty_kick_miss", "clean_sheet", "goals_allowed", "accurate_passes",
                        "shots_assisted", "shootout_goals", "shootout_misses", "game_started", "saves", "wins", 
                        "penalty_kick_saved", "shootout_saves"], axis=1)
main_df = pd.merge(main_df, agg, how="left", on=["player", "team_name", "date"])

In [79]:
main_df.columns

Index(['player', 'date', 'league_name', 'team_name', 'opponent_name',
       'fantasy_points', 'min', 'position', 'year', 'month', 'day', 'points',
       'value', 'skill', 'last_md_points', 'last_md_goals', 'last_md_assists',
       'prev_mean_points', 'prev_mean_goals', 'prev_mean_assists',
       'prev_max_points', 'prev_std_points', 'prev_std_goals',
       'prev_std_assists', 'prev_median_min', 'prev_ratio_starter',
       'count_played', 'goal_consistency', 'assist_consistency',
       'clean_sheet_consistency', 'count_team_played',
       'prev_max_goal_to_specific_opp', 'prev_max_points_to_specific_opp',
       'prev_mean_points_to_specific_opp', 'prev_ratio_played'],
      dtype='object')

In [80]:
players_df.to_csv("{}/interim/fantasy_euro.csv".format(DATA_DIR), index=False)

In [81]:
main_df

Unnamed: 0,player,date,league_name,team_name,opponent_name,fantasy_points,min,position,year,month,day,points,value,skill,last_md_points,last_md_goals,last_md_assists,prev_mean_points,prev_mean_goals,prev_mean_assists,prev_max_points,prev_std_points,prev_std_goals,prev_std_assists,prev_median_min,prev_ratio_starter,count_played,goal_consistency,assist_consistency,clean_sheet_consistency,count_team_played,prev_max_goal_to_specific_opp,prev_max_points_to_specific_opp,prev_mean_points_to_specific_opp,prev_ratio_played
0,Aaron Ramsey,2018-09-06 00:00:00,UEFA Nations League,Wales,Republic of Ireland,21.00,90.0,M,2018,9,6,7.0,8.5,3.0,,,,,,,,,,,,,,,,,,,,,
1,Aaron Ramsey,2018-09-06 00:00:00,UEFA Nations League,Wales,Republic of Ireland,21.00,90.0,M,2018,9,6,7.0,4.0,1.0,7.0,1.0,0.0,,,,,,,,,,,,,,,,,,
2,Aaron Ramsey,2018-09-09 00:00:00,UEFA Nations League,Wales,Denmark,13.50,90.0,M,2018,9,9,2.0,8.5,3.0,7.0,1.0,0.0,7.000000,1.0,0.0,7.0,0.000000,0.000000,0.0,90.0,1.000000,1.0,1.000000,0.0,0.000000,1.0,,,,1.000000
3,Aaron Ramsey,2018-09-09 00:00:00,UEFA Nations League,Wales,Denmark,13.50,90.0,M,2018,9,9,2.0,4.0,1.0,2.0,0.0,0.0,7.000000,1.0,0.0,7.0,0.000000,0.000000,0.0,90.0,1.000000,1.0,1.000000,0.0,0.000000,1.0,,,,1.000000
4,Aaron Ramsey,2018-10-11 00:00:00,International Friendlies,Wales,Spain,1.42,90.0,M,2018,10,11,2.0,8.5,3.0,2.0,0.0,0.0,4.500000,0.5,0.0,7.0,2.672612,0.534522,0.0,90.0,1.000000,2.0,0.500000,0.0,0.000000,2.0,,,,1.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8431,Çaglar Söyüncü,2021-03-24 00:00:00,European World Cup Qualifiers,Turkey,Netherlands,4.98,90.0,D,2021,3,24,1.0,5.0,2.0,6.0,0.0,0.0,3.357143,0.0,0.0,6.0,2.273836,0.000000,0.0,90.0,0.928571,14.0,0.000000,0.0,0.428571,26.0,,,,0.538462
8432,Çaglar Söyüncü,2021-03-27 00:00:00,European World Cup Qualifiers,Turkey,Norway,16.72,90.0,D,2021,3,27,12.0,5.0,2.0,1.0,0.0,0.0,3.200000,0.0,0.0,6.0,2.274078,0.000000,0.0,90.0,0.933333,15.0,0.000000,0.0,0.400000,27.0,,,,0.555556
8433,Çaglar Söyüncü,2021-03-30 00:00:00,European World Cup Qualifiers,Turkey,Latvia,3.54,90.0,D,2021,3,30,0.0,5.0,2.0,12.0,1.0,0.0,3.750000,0.0,0.0,12.0,3.109126,0.250000,0.0,90.0,0.937500,16.0,0.062500,0.0,0.437500,28.0,,,,0.571429
8434,Çaglar Söyüncü,2021-06-11 21:00:00,European Championship 2020,Turkey,Italy,0.00,0.0,D,2021,6,11,0.0,5.0,2.0,0.0,0.0,0.0,3.529412,0.0,0.0,12.0,3.144790,0.242536,0.0,90.0,0.941176,17.0,0.058824,0.0,0.411765,29.0,,,,0.586207


## 3.3 National Team FIFA Rank Dataset

In [82]:
main_df["date"].describe()

  main_df["date"].describe()


count                    8436
unique                    106
top       2020-10-14 00:00:00
freq                      251
first     2018-09-05 00:00:00
last      2021-06-19 21:00:00
Name: date, dtype: object

In [83]:
fifa_rank = pd.read_csv("{}/raw/historical-match-and-rank/fifa_ranking-2021-05-27.csv".format(DATA_DIR))

In [84]:
CUTOFF_DATE = "2018-01-01" 
fifa_rank = fifa_rank[fifa_rank["rank_date"] > CUTOFF_DATE]
fifa_rank = fifa_rank[["country_full", "rank", "total_points", "rank_date"]]
fifa_rank["rank_date"] = pd.to_datetime(fifa_rank["rank_date"])
fifa_rank = fifa_rank.sort_values(by=["country_full", "rank_date"])

In [85]:
# get fifa rank closest to the match date
df_with_rank = pd.merge(main_df[["team_name", "date"]], fifa_rank, how="left", left_on="team_name", right_on="country_full")
df_with_rank["time_diff"] = df_with_rank.apply(lambda x: (x['date']-x['rank_date']).total_seconds(), axis=1)
df_with_rank = df_with_rank[df_with_rank["time_diff"] > 0] # filter out rank after match
df_with_rank = df_with_rank.sort_values(by=["team_name", "time_diff"], ascending=False)
df_with_rank = df_with_rank.groupby(["team_name", "date"]).agg(prev_team_highest_rank=("rank", "min"),
                                                               team_rank=("rank", "last"),
                                                               team_total_points=("total_points", "last")).reset_index()

In [86]:
main_df = pd.merge(main_df, df_with_rank, how="left", on=["team_name", "date"])

In [87]:
# get fifa rank closest to the match date
df_with_rank = pd.merge(main_df[["opponent_name", "date"]], fifa_rank, how="left", left_on="opponent_name", right_on="country_full")
df_with_rank["time_diff"] = df_with_rank.apply(lambda x: (x['date']-x['rank_date']).total_seconds(), axis=1)
df_with_rank = df_with_rank[df_with_rank["time_diff"] > 0] # filter out rank after match
df_with_rank = df_with_rank.sort_values(by=["opponent_name", "time_diff"], ascending=False)
df_with_rank = df_with_rank.groupby(["opponent_name", "date"]).agg(prev_opponent_highest_rank=("rank", "min"),
                                                                   opponent_rank=("rank", "last"),
                                                                   opponent_total_points=("total_points", "last")).reset_index()

In [88]:
main_df = pd.merge(main_df, df_with_rank, how="left", on=["opponent_name", "date"])

In [89]:
main_df.head()

Unnamed: 0,player,date,league_name,team_name,opponent_name,fantasy_points,min,position,year,month,day,points,value,skill,last_md_points,last_md_goals,last_md_assists,prev_mean_points,prev_mean_goals,prev_mean_assists,prev_max_points,prev_std_points,prev_std_goals,prev_std_assists,prev_median_min,prev_ratio_starter,count_played,goal_consistency,assist_consistency,clean_sheet_consistency,count_team_played,prev_max_goal_to_specific_opp,prev_max_points_to_specific_opp,prev_mean_points_to_specific_opp,prev_ratio_played,prev_team_highest_rank,team_rank,team_total_points,prev_opponent_highest_rank,opponent_rank,opponent_total_points
0,Aaron Ramsey,2018-09-06,UEFA Nations League,Wales,Republic of Ireland,21.0,90.0,M,2018,9,6,7.0,8.5,3.0,,,,,,,,,,,,,,,,,,,,,,18,19,1536,26,29,1484
1,Aaron Ramsey,2018-09-06,UEFA Nations League,Wales,Republic of Ireland,21.0,90.0,M,2018,9,6,7.0,4.0,1.0,7.0,1.0,0.0,,,,,,,,,,,,,,,,,,,18,19,1536,26,29,1484
2,Aaron Ramsey,2018-09-09,UEFA Nations League,Wales,Denmark,13.5,90.0,M,2018,9,9,2.0,8.5,3.0,7.0,1.0,0.0,7.0,1.0,0.0,7.0,0.0,0.0,0.0,90.0,1.0,1.0,1.0,0.0,0.0,1.0,,,,1.0,18,19,1536,9,9,1580
3,Aaron Ramsey,2018-09-09,UEFA Nations League,Wales,Denmark,13.5,90.0,M,2018,9,9,2.0,4.0,1.0,2.0,0.0,0.0,7.0,1.0,0.0,7.0,0.0,0.0,0.0,90.0,1.0,1.0,1.0,0.0,0.0,1.0,,,,1.0,18,19,1536,9,9,1580
4,Aaron Ramsey,2018-10-11,International Friendlies,Wales,Spain,1.42,90.0,M,2018,10,11,2.0,8.5,3.0,2.0,0.0,0.0,4.5,0.5,0.0,7.0,2.672612,0.534522,0.0,90.0,1.0,2.0,0.5,0.0,0.0,2.0,,,,1.0,18,19,1536,6,9,1597


In [90]:
main_df.to_csv("{}/interim/main.csv".format(DATA_DIR), index=False)

In [91]:
historical_matches = pd.read_csv("{}/raw/historical-match-and-rank/international-footbal-match.csv".format(DATA_DIR))

In [92]:
historical_matches["date"] = pd.to_datetime(historical_matches["date"])

In [93]:
historical_matches  = historical_matches[historical_matches["date"] > "2010-01-01"]

In [94]:
historical_matches["match"] = historical_matches["home_team"] + ',' + historical_matches['away_team']

In [95]:
historical_matches["match"] = historical_matches["match"].apply(lambda x: ' '.join(sorted(x.split(","))))

In [96]:
def get_match_result(df):
    if df["home_score"] > df["away_score"]:
        return df["home_team"]
    elif df["away_score"] > df["home_score"]:
        return df["away_team"]
    else:
        return "Draw"

In [97]:
historical_matches["result"] = historical_matches.apply(get_match_result, axis=1)

In [98]:
historical_matches["margin"] = historical_matches.apply(lambda x: abs(x["home_score"] - x["away_score"]), axis=1)

In [99]:
historical_matches.head()

Unnamed: 0,date,home_team,away_team,home_score,away_score,tournament,city,country,neutral,match,result,margin
31892,2010-01-02,Iran,North Korea,1,0,Friendly,Doha,Qatar,True,Iran North Korea,Iran,1
31893,2010-01-02,Qatar,Mali,0,0,Friendly,Doha,Qatar,False,Mali Qatar,Draw,0
31894,2010-01-02,Syria,Zimbabwe,6,0,Friendly,Kuala Lumpur,Malaysia,True,Syria Zimbabwe,Syria,6
31895,2010-01-02,Yemen,Tajikistan,0,1,Friendly,Sana'a,Yemen,False,Tajikistan Yemen,Tajikistan,1
31896,2010-01-03,Angola,Gambia,1,1,Friendly,Vila Real de Santo António,Portugal,True,Angola Gambia,Draw,0


In [100]:
def get_all_historical_matches(df, team, opp, date):
    name_tuple = ' '.join(sorted([team, opp]))
    hist = df[(df['match'] == name_tuple) & (df["date"] < date)]
    hth = hist["result"].value_counts()
    team_win, opp_win, draw = 0, 0, 0
    if "Draw" in hth.keys():
        draw = hth["Draw"]
    if team in hth.keys():
        team_win = hth[team]
    if opp in hth.keys():
        opp_win = hth[opp]
    max_margin = hist["margin"].max()
    team_score = hist[hist['home_team'] == team]["home_score"].sum() + hist[hist['away_team'] == team]["away_score"].sum()
    opp_score = hist[hist['home_team'] == opp]["home_score"].sum() + hist[hist['away_team'] == opp]["away_score"].sum()
    return pd.Series([team_win, opp_win, draw, team_score, opp_score, max_margin])

In [101]:
main_df[["hth_team_win", "hth_opp_win", "hth_draw", "hth_team_score", "hth_opp_score", "htt_max_margin"]] = main_df.apply(lambda x: get_all_historical_matches(historical_matches, x["team_name"], x["opponent_name"], x["date"]), axis=1)

## 3.4 Transfermarkt Dataset

### 3.4.1 National Team Level

In [102]:
euro = pd.read_excel("{}/raw/transfermarkt/transfermarkt-market-value.xlsx".format(DATA_DIR), sheet_name=0)
nations_league = pd.read_excel("{}/raw/transfermarkt/transfermarkt-market-value.xlsx".format(DATA_DIR), sheet_name=1)
euro_qual = pd.read_excel("{}/raw/transfermarkt/transfermarkt-market-value.xlsx".format(DATA_DIR), sheet_name=2)
wc_euro_qual = pd.read_excel("{}/raw/transfermarkt/transfermarkt-market-value.xlsx".format(DATA_DIR), sheet_name=3)

In [103]:
nations_league["league_name"] = "UEFA Nations League"
euro_qual["league_name"] = "European Championship Qualifiers"
wc_euro_qual["league_name"] = "European World Cup Qualifiers"
euro["league_name"] = "European Championship 2020"

In [104]:
euro = euro.drop(["EURO participations"], axis=1)
euro.rename(columns={"Average Age": "Age"}, inplace=True)

In [105]:
def preprocess_market_value(text):
    match = re.sub("€", "", text)
    match = re.search("(\d+(?:\.\d+)?)", text)
    val = float(match.group())
    num = text[match.end():]
    if num == "bn":
        val *= 10e9
    elif num == "m":
        val *= 10e6
    elif num == "Th.":
        val *= 10e3
    return val

In [106]:
mv_df = pd.concat([nations_league, euro_qual, wc_euro_qual, euro])

In [107]:
mv_df["market_value"] = mv_df["Market Value"].apply(preprocess_market_value)
mv_df["mean_market_value"] = mv_df["Average Market Value"].apply(preprocess_market_value)

In [108]:
mv_df = mv_df.drop_duplicates(subset=["Club", "league_name"], keep="first")
mv_df = mv_df[["Club", "league_name", "Age", "market_value", "mean_market_value"]]
mv_df.rename(columns={"Club" : "team_name", "Age": "mean_squad_age"}, inplace=True)

In [109]:
main_df = pd.merge(main_df, mv_df, how="left", on=["team_name", "league_name"])

In [110]:
main_df.rename(columns={"mean_squad_age" : "team_mean_squad_age",
                        "mean_market_value": "team_mean_market_value",
                        "market_value" : "team_market_value"
                       }, inplace=True)

In [111]:
mv_df.rename(columns={"team_name" : "opponent_name"}, inplace=True)

In [112]:
main_df = pd.merge(main_df, mv_df, how="left", on=["opponent_name", "league_name"])

In [113]:
main_df.rename(columns={"mean_squad_age" : "opponent_mean_squad_age",
                        "mean_market_value": "opponent_mean_market_value",
                        "market_value" : "opponent_market_value"
                       }, inplace=True)

### 3.4.2 Player Level

In [114]:
data_1718 = pd.read_csv("{}/raw/transfermarkt/transfermarkt_fbref_201718.csv".format(DATA_DIR), delimiter=";")
data_1819 = pd.read_csv("{}/raw/transfermarkt/transfermarkt_fbref_201819.csv".format(DATA_DIR), delimiter=";")
data_1920 = pd.read_csv("{}/raw/transfermarkt/transfermarkt_fbref_201920.csv".format(DATA_DIR), delimiter=";")

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,


In [115]:
all_season = pd.concat([data_1718, data_1819, data_1920])

In [116]:
def get_year(season):
    if season == "201718#":
        return 2019
    elif season == "201819#":
        return 2020
    elif season == "201920#":
        return 2021
    else:
        return 0

In [117]:
all_season['year'] = all_season["Season"].apply(get_year)

In [118]:
all_season

Unnamed: 0.1,Unnamed: 0,player,nationality,position,squad,age,birth_year,value,height,position2,foot,league,games,games_starts,minutes,goals,assists,pens_made,pens_att,cards_yellow,cards_red,goals_per90,assists_per90,goals_assists_per90,goals_pens_per90,goals_assists_pens_per90,xg,npxg,xa,xg_per90,xa_per90,xg_xa_per90,npxg_per90,npxg_xa_per90,minutes_90s,shots_total,shots_on_target,shots_free_kicks,shots_on_target_pct,shots_total_per90,shots_on_target_per90,goals_per_shot,goals_per_shot_on_target,npxg_per_shot,xg_net,npxg_net,passes_completed,passes,passes_pct,passes_total_distance,passes_progressive_distance,passes_completed_short,passes_short,passes_pct_short,passes_completed_medium,passes_medium,passes_pct_medium,passes_completed_long,passes_long,passes_pct_long,xa_net,assisted_shots,passes_into_final_third,passes_into_penalty_area,crosses_into_penalty_area,progressive_passes,passes_live,passes_dead,passes_free_kicks,through_balls,passes_pressure,passes_switches,crosses,corner_kicks,corner_kicks_in,corner_kicks_out,corner_kicks_straight,passes_ground,passes_low,passes_high,passes_left_foot,passes_right_foot,passes_head,throw_ins,passes_other_body,passes_offsides,passes_oob,passes_intercepted,passes_blocked,sca,sca_per90,sca_passes_live,sca_passes_dead,sca_dribbles,sca_shots,sca_fouled,gca,gca_per90,gca_passes_live,gca_passes_dead,gca_dribbles,gca_shots,gca_fouled,gca_og_for,tackles,tackles_won,tackles_def_3rd,tackles_mid_3rd,tackles_att_3rd,dribble_tackles,dribbles_vs,dribble_tackles_pct,dribbled_past,pressures,pressure_regains,pressure_regain_pct,pressures_def_3rd,pressures_mid_3rd,pressures_att_3rd,blocks,blocked_shots,blocked_shots_saves,blocked_passes,interceptions,clearances,errors,touches,touches_def_pen_area,touches_def_3rd,touches_mid_3rd,touches_att_3rd,touches_att_pen_area,touches_live_ball,dribbles_completed,dribbles,dribbles_completed_pct,players_dribbled_past,nutmegs,carries,carry_distance,carry_progressive_distance,pass_targets,passes_received,passes_received_pct,miscontrols,dispossessed,cards_yellow_red,fouls,fouled,offsides,pens_won,pens_conceded,own_goals,ball_recoveries,aerials_won,aerials_lost,aerials_won_pct,games_gk,games_starts_gk,minutes_gk,goals_against_gk,goals_against_per90_gk,shots_on_target_against,saves,save_pct,wins_gk,draws_gk,losses_gk,clean_sheets,clean_sheets_pct,pens_att_gk,pens_allowed,pens_saved,pens_missed_gk,minutes_90s_gk,free_kick_goals_against_gk,corner_kick_goals_against_gk,own_goals_against_gk,psxg_gk,psnpxg_per_shot_on_target_against,psxg_net_gk,psxg_net_per90_gk,passes_completed_launched_gk,passes_launched_gk,passes_pct_launched_gk,passes_gk,passes_throws_gk,pct_passes_launched_gk,passes_length_avg_gk,goal_kicks,pct_goal_kicks_launched,goal_kick_length_avg,crosses_gk,crosses_stopped_gk,crosses_stopped_pct_gk,def_actions_outside_pen_area_gk,def_actions_outside_pen_area_per90_gk,avg_distance_def_actions_gk,goalsm,assistsm,pens_madem,pens_attm,cards_yellowm,cards_redm,goals_per90m,assists_per90m,goals_assists_per90m,goals_pens_per90m,goals_assists_pens_per90m,xgm,npxgm,xam,xg_per90m,xa_per90m,xg_xa_per90m,npxg_per90m,npxg_xa_per90m,minutes_90sm,shots_totalm,shots_on_targetm,shots_free_kicksm,shots_on_target_pctm,shots_total_per90m,shots_on_target_per90m,goals_per_shotm,goals_per_shot_on_targetm,npxg_per_shotm,xg_netm,npxg_netm,passes_completedm,passesm,passes_pctm,passes_total_distancem,passes_progressive_distancem,passes_completed_shortm,passes_shortm,passes_pct_shortm,passes_completed_mediumm,passes_mediumm,passes_pct_mediumm,passes_completed_longm,passes_longm,passes_pct_longm,xa_netm,assisted_shotsm,passes_into_final_thirdm,passes_into_penalty_aream,crosses_into_penalty_aream,progressive_passesm,passes_livem,passes_deadm,passes_free_kicksm,through_ballsm,passes_pressurem,passes_switchesm,crossesm,corner_kicksm,corner_kicks_inm,corner_kicks_outm,corner_kicks_straightm,passes_groundm,passes_lowm,passes_highm,passes_left_footm,passes_right_footm,passes_headm,throw_insm,passes_other_bodym,passes_offsidesm,passes_oobm,passes_interceptedm,passes_blockedm,scam,sca_per90m,sca_passes_livem,sca_passes_deadm,sca_dribblesm,sca_shotsm,sca_fouledm,gcam,gca_per90m,gca_passes_livem,gca_passes_deadm,gca_dribblesm,gca_shotsm,gca_fouledm,gca_og_form,tacklesm,tackles_wonm,tackles_def_3rdm,tackles_mid_3rdm,tackles_att_3rdm,dribble_tacklesm,dribbles_vsm,dribble_tackles_pctm,dribbled_pastm,pressuresm,pressure_regainsm,pressure_regain_pctm,pressures_def_3rdm,pressures_mid_3rdm,pressures_att_3rdm,blocksm,blocked_shotsm,blocked_shots_savesm,blocked_passesm,interceptionsm,clearancesm,errorsm,touchesm,touches_def_pen_aream,touches_def_3rdm,touches_mid_3rdm,touches_att_3rdm,touches_att_pen_aream,touches_live_ballm,dribbles_completedm,dribblesm,dribbles_completed_pctm,players_dribbled_pastm,nutmegsm,carriesm,carry_distancem,carry_progressive_distancem,pass_targetsm,passes_receivedm,passes_received_pctm,miscontrolsm,dispossessedm,cards_yellow_redm,foulsm,fouledm,offsidesm,pens_wonm,pens_concededm,own_goalsm,ball_recoveriesm,aerials_wonm,aerials_lostm,aerials_won_pctm,games_gkm,games_starts_gkm,minutes_gkm,goals_against_gkm,goals_against_per90_gkm,shots_on_target_againstm,savesm,save_pctm,wins_gkm,draws_gkm,losses_gkm,clean_sheetsm,clean_sheets_pctm,pens_att_gkm,pens_allowedm,pens_savedm,pens_missed_gkm,minutes_90s_gkm,free_kick_goals_against_gkm,corner_kick_goals_against_gkm,own_goals_against_gkm,psxg_gkm,psnpxg_per_shot_on_target_againstm,psxg_net_gkm,psxg_net_per90_gkm,passes_completed_launched_gkm,passes_launched_gkm,passes_pct_launched_gkm,passes_gkm,passes_throws_gkm,pct_passes_launched_gkm,passes_length_avg_gkm,goal_kicksm,pct_goal_kicks_launchedm,goal_kick_length_avgm,crosses_gkm,crosses_stopped_gkm,crosses_stopped_pct_gkm,def_actions_outside_pen_area_gkm,def_actions_outside_pen_area_per90_gkm,avg_distance_def_actions_gkm,LgRk,MP,W,D,L,GF,GA,GDiff,Pts,Pts/G,xG,xGA,xGDiff,xGDiff/90,Attendance,CL,WinCL,CLBestScorer,Season,Column1,year
0,379.0,Burgui,es ESP,"FW,MF",Alavés,23.0,1993.0,1800000.0,186.0,attack - Left Winger,right,La Liga,23.0,12.0,1037.0,1.0,1.0,0.0,0.0,1.0,0.0,0.09,0.09,0.17,0.09,0.17,0.9,0.9,1.7,0.08,0.15,0.23,0.08,0.23,11.5,14.0,4.0,1.0,28.6,1.22,0.35,0.07,0.25,0.06,0.1,0.1,264.0,354.0,74.6,4623.0,1578.0,127.0,154.0,82.5,107.0,143.0,74.8,27.0,41.0,65.9,-0.7,17.0,28.0,15.0,1.0,47.0,336.0,18.0,3.0,2.0,73.0,7.0,18.0,3.0,0.0,0.0,0.0,280.0,35.0,39.0,42.0,286.0,16.0,9.0,0.0,1.0,8.0,14.0,22.0,42.0,3.66,33.0,0.0,2.0,2.0,4.0,3.0,0.26,3.0,0.0,0.0,0.0,0.0,0.0,12.0,7.0,4.0,3.0,5.0,3.0,7.0,42.9,4.0,199.0,47.0,23.6,32.0,97.0,70.0,9.0,0.0,0.0,9.0,4.0,1.0,0.0,463.0,6.0,56.0,217.0,223.0,29.0,445.0,24.0,31.0,77.4,25.0,2.0,388.0,2670.0,1520.0,468.0,345.0,73.7,24.0,20.0,0.0,10.0,19.0,5.0,1.0,0.0,0.0,56.0,1.0,11.0,8.3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000699,0.000000,0.0,0.0,0.005590,0.000699,0.000042,0.000000,0.000042,0.000042,0.000042,0.000280,0.000280,0.000280,0.000014,0.000014,0.000035,0.000014,0.000035,0.011111,0.002096,0.000699,0.000000,0.023270,0.000133,0.000042,0.000231,0.000699,0.000084,0.000419,0.000419,0.313068,0.382250,0.057233,5.339623,1.338924,0.143256,0.162124,0.061775,0.127883,0.158630,0.056324,0.030748,0.045423,0.047310,-0.000280,0.004193,0.019567,0.001398,0.000000,0.024458,0.368274,0.013976,0.009085,0.000000,0.060797,0.002096,0.000699,0.000000,0.0,0.00000,0.0,0.227813,0.077568,0.076869,0.018169,0.292103,0.051013,0.004193,0.007687,0.000000,0.010482,0.004892,0.006988,0.010482,0.000657,0.008386,0.000000,0.000000,0.000000,0.000699,0.000699,0.000042,0.000699,0.0,0.000000,0.00000,0.000000,0.00000,0.029350,0.020964,0.013976,0.010482,0.004892,0.012579,0.028651,0.030678,0.016073,0.206150,0.071279,0.024179,0.081761,0.103424,0.020964,0.016073,0.003494,0.0,0.012579,0.015374,0.037736,0.000000,0.477987,0.049616,0.166317,0.276031,0.050314,0.005590,0.464710,0.000000,0.000000,0.000000,0.000000,0.000000,0.232704,1.046122,0.422082,0.240391,0.213836,0.062194,0.000699,0.001398,0.00000,0.016771,0.014675,0.000000,0.00000,0.000699,0.0,0.146052,0.011880,0.011880,0.034941,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,0.000000,0.000000,0.00000,0.000000,0.00000,0.000000,0.0,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.0,0.0,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,14.0,38.0,15.0,2.0,21.0,40.0,50.0,-10.0,47.0,1.24,39.0,53.2,-14.2,-0.37,16819,0.0,0.0,0.0,201718#,,2019
1,2530.0,Raphaël Varane,fr FRA,DF,Real Madrid,24.0,1993.0,70000000.0,191.0,Defender - Centre-Back,right,La Liga,27.0,27.0,2320.0,0.0,1.0,0.0,0.0,3.0,0.0,0.00,0.04,0.04,0.00,0.04,0.9,0.9,0.9,0.04,0.04,0.07,0.04,0.07,25.8,10.0,2.0,0.0,20.0,0.39,0.08,0.00,0.00,0.09,-0.9,-0.9,1169.0,1323.0,88.4,25454.0,8106.0,330.0,361.0,91.4,600.0,657.0,91.3,229.0,284.0,80.6,0.1,6.0,82.0,3.0,0.0,52.0,1298.0,25.0,24.0,1.0,193.0,41.0,1.0,0.0,0.0,0.0,0.0,1026.0,85.0,212.0,461.0,742.0,88.0,1.0,5.0,4.0,20.0,13.0,12.0,17.0,0.66,14.0,0.0,0.0,3.0,0.0,2.0,0.08,1.0,0.0,0.0,1.0,0.0,0.0,22.0,14.0,18.0,4.0,0.0,5.0,15.0,33.3,10.0,242.0,86.0,35.5,145.0,90.0,7.0,32.0,14.0,2.0,18.0,17.0,80.0,2.0,1504.0,179.0,788.0,753.0,70.0,19.0,1479.0,10.0,12.0,83.3,10.0,0.0,1031.0,6596.0,2986.0,992.0,973.0,98.1,3.0,3.0,0.0,17.0,12.0,0.0,0.0,0.0,0.0,272.0,31.0,11.0,73.8,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.001751,0.0,0.0,0.001751,0.000000,0.000000,0.000280,0.000280,0.000000,0.000280,0.001226,0.001226,0.002627,0.000193,0.000403,0.000595,0.000193,0.000595,0.011033,0.028021,0.007005,0.000000,0.043783,0.004413,0.001103,0.000000,0.000000,0.000070,-0.001226,-0.001226,0.506130,0.686515,0.129072,8.309982,2.357268,0.271454,0.322242,0.147461,0.176883,0.229422,0.135026,0.050788,0.092820,0.095797,-0.001751,0.022767,0.024518,0.029772,0.008757,0.063047,0.514886,0.024518,0.001751,0.003503,0.068301,0.007005,0.029772,0.001751,0.0,0.00000,0.0,0.392294,0.070053,0.077058,0.161121,0.320490,0.028021,0.021016,0.005254,0.003503,0.010508,0.017513,0.028021,0.050788,0.004343,0.042032,0.003503,0.001751,0.000000,0.001751,0.003503,0.000298,0.003503,0.0,0.000000,0.00000,0.000000,0.00000,0.017513,0.012259,0.005254,0.010508,0.001751,0.008757,0.024518,0.062522,0.015762,0.164623,0.052539,0.055867,0.036778,0.092820,0.035026,0.010508,0.001751,0.0,0.008757,0.003503,0.005254,0.000000,0.677758,0.017513,0.094571,0.309982,0.316988,0.054291,0.653240,0.008757,0.019264,0.079685,0.008757,0.001751,0.523643,3.073555,1.929947,0.556918,0.483363,0.152014,0.021016,0.024518,0.00000,0.014011,0.012259,0.000000,0.00000,0.000000,0.0,0.119089,0.003503,0.008757,0.050088,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,0.000000,0.000000,0.00000,0.000000,0.00000,0.000000,0.0,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.0,0.0,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,3.0,38.0,22.0,10.0,6.0,94.0,44.0,50.0,76.0,2.00,83.3,45.4,37.9,1.00,66161,1.0,1.0,0.0,201718#,,2019
2,721.0,Rubén Duarte,es ESP,DF,Alavés,21.0,1995.0,2000000.0,179.0,Defender - Left-Back,left,La Liga,24.0,24.0,2138.0,0.0,2.0,0.0,0.0,9.0,2.0,0.00,0.08,0.08,0.00,0.08,0.5,0.5,1.6,0.02,0.07,0.09,0.02,0.09,23.8,8.0,1.0,0.0,12.5,0.34,0.04,0.00,0.00,0.06,-0.5,-0.5,660.0,963.0,68.5,11820.0,5822.0,311.0,366.0,85.0,271.0,373.0,72.7,71.0,200.0,35.5,0.4,18.0,41.0,21.0,8.0,62.0,748.0,215.0,5.0,1.0,153.0,22.0,36.0,1.0,0.0,0.0,0.0,394.0,177.0,392.0,545.0,87.0,100.0,209.0,2.0,14.0,30.0,12.0,25.0,34.0,1.43,27.0,1.0,0.0,1.0,5.0,2.0,0.08,0.0,0.0,0.0,0.0,2.0,0.0,54.0,29.0,32.0,19.0,3.0,18.0,49.0,36.7,31.0,345.0,83.0,24.1,213.0,99.0,33.0,74.0,11.0,1.0,63.0,30.0,51.0,0.0,1217.0,107.0,440.0,531.0,293.0,19.0,1003.0,11.0,12.0,91.7,11.0,1.0,537.0,2738.0,1412.0,511.0,463.0,90.6,5.0,13.0,1.0,48.0,35.0,1.0,0.0,2.0,0.0,283.0,22.0,15.0,59.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.0,0.0,0.000585,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000058,0.000000,0.000000,0.000000,0.000000,0.000000,0.011111,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.115789,0.347368,0.009737,6.807895,5.936550,0.001170,0.001462,0.023392,0.010526,0.011696,0.026316,0.104094,0.333918,0.009123,-0.000058,0.000877,0.019006,0.000000,0.000000,0.000000,0.210234,0.137135,0.043860,0.000292,0.028947,0.005848,0.000000,0.000000,0.0,0.00000,0.0,0.014327,0.005556,0.327485,0.013450,0.270760,0.000000,0.000000,0.014327,0.001754,0.017836,0.002047,0.000585,0.001754,0.000047,0.001170,0.000585,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.00000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000292,0.000000,0.000292,0.000585,0.000000,0.000000,0.000292,0.000000,0.000292,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.385380,0.314620,0.376023,0.009649,0.000000,0.000000,0.250000,0.000000,0.000000,0.000000,0.000000,0.000000,0.169591,1.059357,0.633626,0.120760,0.120175,0.029094,0.000000,0.000000,0.00000,0.000000,0.001462,0.000000,0.00000,0.000000,0.0,0.064327,0.000000,0.000000,0.000000,0.011111,0.011111,1.0,0.020175,0.000532,0.058772,0.039474,0.000198,0.002924,0.00117,0.007018,0.002924,0.00769,0.002047,0.00117,0.000877,0.0,0.011111,0.000292,0.003509,0.0,0.020175,0.000094,0.0,0.0,0.097368,0.322515,0.00883,0.254094,0.014327,0.026433,0.018012,0.093275,0.029064,0.020643,0.087427,0.006433,0.002164,0.011111,0.000292,0.004737,14.0,38.0,15.0,2.0,21.0,40.0,50.0,-10.0,47.0,1.24,39.0,53.2,-14.2,-0.37,16819,0.0,0.0,0.0,201718#,,2019
3,2512.0,Samuel Umtiti,fr FRA,DF,Barcelona,23.0,1993.0,60000000.0,182.0,Defender - Centre-Back,left,La Liga,25.0,24.0,2189.0,1.0,0.0,0.0,0.0,7.0,0.0,0.04,0.00,0.04,0.04,0.04,1.1,1.1,0.0,0.05,0.00,0.05,0.05,0.05,24.3,11.0,6.0,0.0,54.5,0.45,0.25,0.09,0.17,0.10,-0.1,-0.1,1435.0,1591.0,90.2,28492.0,9925.0,468.0,514.0,91.1,760.0,804.0,94.5,200.0,252.0,79.4,0.0,1.0,118.0,1.0,0.0,90.0,1567.0,24.0,19.0,1.0,236.0,23.0,0.0,0.0,0.0,0.0,0.0,1345.0,106.0,140.0,1226.0,252.0,71.0,5.0,8.0,2.0,21.0,10.0,23.0,10.0,0.41,9.0,0.0,0.0,0.0,1.0,1.0,0.04,0.0,0.0,0.0,0.0,1.0,0.0,40.0,26.0,27.0,12.0,1.0,12.0,23.0,52.2,11.0,211.0,85.0,40.3,124.0,77.0,10.0,30.0,15.0,0.0,15.0,30.0,55.0,0.0,1768.0,214.0,855.0,956.0,78.0,17.0,1747.0,6.0,8.0,75.0,6.0,1.0,1318.0,7973.0,4639.0,1272.0,1251.0,98.3,7.0,4.0,0.0,23.0,25.0,4.0,0.0,0.0,0.0,248.0,41.0,12.0,77.4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.0,0.0,0.003899,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000195,0.000195,0.000682,0.000010,0.000058,0.000078,0.000010,0.000078,0.011111,0.002924,0.000975,0.000000,0.032456,0.000253,0.000088,0.000000,0.000000,0.000049,-0.000195,-0.000195,0.571150,0.622807,0.089376,11.442495,3.133528,0.167641,0.178363,0.091618,0.335283,0.352827,0.092593,0.063353,0.084795,0.072807,-0.000682,0.003899,0.026316,0.001949,0.000000,0.017544,0.591618,0.031189,0.031189,0.001949,0.069201,0.010721,0.001949,0.000000,0.0,0.00000,0.0,0.525341,0.043860,0.053606,0.134503,0.442495,0.036062,0.000000,0.000975,0.001949,0.003899,0.004873,0.002924,0.006823,0.000595,0.005848,0.000000,0.000975,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.00000,0.000000,0.00000,0.021442,0.009747,0.014620,0.005848,0.000975,0.006823,0.008772,0.075828,0.001949,0.126706,0.036062,0.027778,0.082846,0.036062,0.007797,0.025341,0.015595,0.0,0.009747,0.005848,0.061404,0.000975,0.741715,0.085770,0.365497,0.405458,0.022417,0.007797,0.710526,0.002924,0.003899,0.073099,0.002924,0.000000,0.476608,2.952242,1.544834,0.504873,0.496101,0.095809,0.000975,0.000975,0.00000,0.008772,0.007797,0.000000,0.00000,0.000000,0.0,0.096491,0.021442,0.010721,0.065010,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,0.000000,0.000000,0.00000,0.000000,0.00000,0.000000,0.0,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.0,0.0,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,1.0,38.0,28.0,9.0,1.0,99.0,29.0,70.0,93.0,2.45,78.3,41.1,37.2,0.98,66603,1.0,0.0,0.0,201718#,,2019
4,882.0,Manu García,es ESP,MF,Alavés,31.0,1986.0,1800000.0,183.0,midfield - Defensive Midfield,left,La Liga,30.0,27.0,2438.0,3.0,0.0,0.0,2.0,9.0,0.0,0.11,0.00,0.11,0.11,0.11,3.1,1.6,1.4,0.11,0.05,0.17,0.06,0.11,27.1,26.0,8.0,0.0,30.8,0.96,0.30,0.12,0.38,0.06,-0.1,1.4,724.0,997.0,72.6,13562.0,3815.0,312.0,398.0,78.4,302.0,391.0,77.2,100.0,174.0,57.5,-1.4,15.0,92.0,5.0,2.0,65.0,979.0,18.0,7.0,1.0,233.0,26.0,11.0,0.0,0.0,0.0,0.0,650.0,105.0,242.0,564.0,269.0,118.0,8.0,2.0,7.0,34.0,19.0,27.0,41.0,1.51,29.0,0.0,4.0,2.0,6.0,2.0,0.07,2.0,0.0,0.0,0.0,0.0,0.0,90.0,69.0,38.0,44.0,8.0,20.0,78.0,25.6,58.0,615.0,145.0,23.6,221.0,337.0,57.0,40.0,9.0,0.0,31.0,32.0,55.0,0.0,1327.0,82.0,365.0,808.0,213.0,39.0,1308.0,17.0,21.0,81.0,17.0,0.0,818.0,3811.0,1714.0,865.0,732.0,84.6,23.0,22.0,0.0,66.0,72.0,5.0,0.0,0.0,0.0,292.0,42.0,30.0,58.3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.0,0.0,0.004484,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000224,0.000224,0.000224,0.000045,0.000022,0.000067,0.000045,0.000067,0.011211,0.006726,0.002242,0.000000,0.074664,0.001368,0.000448,0.000000,0.000000,0.000067,-0.000224,-0.000224,0.517937,0.582960,0.199103,10.159193,2.497758,0.172646,0.190583,0.203139,0.275785,0.300448,0.205830,0.058296,0.076233,0.171525,-0.000224,0.002242,0.051570,0.002242,0.000000,0.042601,0.551570,0.031390,0.029148,0.002242,0.076233,0.015695,0.002242,0.000000,0.0,0.00000,0.0,0.484305,0.022422,0.076233,0.040359,0.502242,0.026906,0.002242,0.000000,0.000000,0.002242,0.006726,0.002242,0.013453,0.002713,0.006726,0.000000,0.000000,0.002242,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.00000,0.000000,0.00000,0.029148,0.022422,0.013453,0.008969,0.006726,0.008969,0.015695,0.128027,0.006726,0.103139,0.042601,0.092601,0.038117,0.051570,0.013453,0.011211,0.004484,0.0,0.006726,0.020179,0.049327,0.000000,0.701794,0.038117,0.251121,0.414798,0.085202,0.013453,0.670404,0.008969,0.008969,0.224215,0.008969,0.000000,0.497758,2.820628,1.459641,0.488789,0.466368,0.213901,0.008969,0.008969,0.00000,0.017937,0.015695,0.000000,0.00000,0.000000,0.0,0.096413,0.024664,0.015695,0.136996,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,0.000000,0.000000,0.00000,0.000000,0.00000,0.000000,0.0,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.0,0.0,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,14.0,38.0,15.0,2.0,21.0,40.0,50.0,-10.0,47.0,1.24,39.0,53.2,-14.2,-0.37,16819,0.0,0.0,0.0,201718#,,2019
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2639,,Daniel Podence,pt POR,"FW,MF",Wolves,23.0,1995.0,12000000.0,165.0,Forward - Right Winger,right,Premier League,9.0,3.0,289.0,1.0,0.0,0.0,0.0,0.0,0.0,0.31,0.00,0.31,0.31,0.31,1.6,1.6,0.4,0.49,0.13,0.62,0.49,0.62,3.2,10.0,4.0,0.0,40.0,3.11,1.25,0.10,0.25,0.16,-0.6,-0.6,74.0,105.0,70.5,1110.0,232.0,40.0,51.0,78.4,26.0,35.0,74.3,4.0,12.0,33.3,-0.4,5.0,3.0,3.0,0.0,8.0,102.0,3.0,0.0,1.0,18.0,0.0,5.0,2.0,0.0,1.0,0.0,75.0,13.0,17.0,12.0,90.0,3.0,0.0,0.0,1.0,3.0,2.0,3.0,11.0,3.43,7.0,0.0,1.0,1.0,2.0,2.0,0.62,0.0,0.0,1.0,0.0,1.0,0.0,2.0,2.0,1.0,0.0,1.0,0.0,3.0,0.0,3.0,43.0,13.0,30.2,8.0,20.0,15.0,5.0,1.0,0.0,4.0,0.0,3.0,0.0,153.0,2.0,15.0,69.0,84.0,16.0,150.0,9.0,13.0,69.2,10.0,0.0,113.0,910.0,570.0,159.0,117.0,73.6,3.0,8.0,0.0,5.0,9.0,3.0,1.0,0.0,0.0,19.0,0.0,7.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.003460,0.000000,0.0,0.0,0.000000,0.000000,0.001073,0.000000,0.001073,0.001073,0.001073,0.005536,0.005536,0.001384,0.001696,0.000450,0.002145,0.001696,0.002145,0.011073,0.034602,0.013841,0.000000,0.138408,0.010761,0.004325,0.000346,0.000865,0.000554,-0.002076,-0.002076,0.256055,0.363322,0.243945,3.840830,0.802768,0.138408,0.176471,0.271280,0.089965,0.121107,0.257093,0.013841,0.041522,0.115225,-0.001384,0.017301,0.010381,0.010381,0.000000,0.027682,0.352941,0.010381,0.000000,0.003460,0.062284,0.000000,0.017301,0.006920,0.0,0.00346,0.0,0.259516,0.044983,0.058824,0.041522,0.311419,0.010381,0.000000,0.000000,0.003460,0.010381,0.006920,0.010381,0.038062,0.011869,0.024221,0.000000,0.003460,0.003460,0.006920,0.006920,0.002145,0.000000,0.0,0.003460,0.00000,0.003460,0.00000,0.006920,0.006920,0.003460,0.000000,0.003460,0.000000,0.010381,0.000000,0.010381,0.148789,0.044983,0.104498,0.027682,0.069204,0.051903,0.017301,0.003460,0.0,0.013841,0.000000,0.010381,0.000000,0.529412,0.006920,0.051903,0.238754,0.290657,0.055363,0.519031,0.031142,0.044983,0.239446,0.034602,0.000000,0.391003,3.148789,1.972318,0.550173,0.404844,0.254671,0.010381,0.027682,0.00000,0.017301,0.031142,0.010381,0.00346,0.000000,0.0,0.065744,0.000000,0.024221,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,0.000000,0.000000,0.00000,0.000000,0.00000,0.000000,0.0,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.0,0.0,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,7.0,38.0,15.0,14.0,9.0,51.0,40.0,11.0,59.0,1.55,47.1,34.8,12.3,0.32,24758,0.0,0.0,,201920#,1997.0,2021
2640,,Romain SaĂŻss,ma MAR,DF,Wolves,29.0,1990.0,4000000.0,188.0,Defender - Centre-Back,left,Premier League,33.0,31.0,2705.0,2.0,1.0,0.0,0.0,12.0,1.0,0.07,0.03,0.10,0.07,0.10,2.7,2.7,0.5,0.09,0.02,0.11,0.09,0.11,30.1,23.0,8.0,1.0,34.8,0.77,0.27,0.09,0.25,0.12,-0.7,-0.7,1252.0,1540.0,81.3,28003.0,10404.0,306.0,342.0,89.5,699.0,779.0,89.7,239.0,382.0,62.6,0.5,7.0,119.0,9.0,0.0,115.0,1493.0,47.0,37.0,1.0,187.0,61.0,7.0,0.0,0.0,0.0,0.0,952.0,226.0,362.0,1232.0,136.0,97.0,10.0,5.0,4.0,32.0,18.0,31.0,25.0,0.83,19.0,1.0,1.0,3.0,0.0,2.0,0.07,0.0,0.0,0.0,1.0,0.0,1.0,66.0,30.0,46.0,20.0,0.0,23.0,45.0,51.1,22.0,343.0,116.0,33.8,228.0,106.0,9.0,58.0,19.0,0.0,39.0,27.0,201.0,1.0,1931.0,175.0,926.0,1008.0,88.0,26.0,1891.0,7.0,9.0,77.8,8.0,1.0,1058.0,4699.0,2510.0,1201.0,1165.0,97.0,6.0,8.0,1.0,43.0,24.0,2.0,0.0,0.0,0.0,358.0,117.0,65.0,64.3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000739,0.000370,0.0,0.0,0.004436,0.000370,0.000026,0.000011,0.000037,0.000026,0.000037,0.000998,0.000998,0.000185,0.000033,0.000007,0.000041,0.000033,0.000041,0.011128,0.008503,0.002957,0.000370,0.012865,0.000285,0.000100,0.000033,0.000092,0.000044,-0.000259,-0.000259,0.462847,0.569316,0.030055,10.352311,3.846211,0.113124,0.126433,0.033087,0.258410,0.287985,0.033161,0.088355,0.141220,0.023142,0.000185,0.002588,0.043993,0.003327,0.000000,0.042514,0.551941,0.017375,0.013678,0.000370,0.069131,0.022551,0.002588,0.000000,0.0,0.00000,0.0,0.351941,0.083549,0.133826,0.455453,0.050277,0.035860,0.003697,0.001848,0.001479,0.011830,0.006654,0.011460,0.009242,0.000307,0.007024,0.000370,0.000370,0.001109,0.000000,0.000739,0.000026,0.000000,0.0,0.000000,0.00037,0.000000,0.00037,0.024399,0.011091,0.017006,0.007394,0.000000,0.008503,0.016636,0.018891,0.008133,0.126802,0.042884,0.012495,0.084288,0.039187,0.003327,0.021442,0.007024,0.0,0.014418,0.009982,0.074307,0.000370,0.713863,0.064695,0.342329,0.372643,0.032532,0.009612,0.699076,0.002588,0.003327,0.028762,0.002957,0.000370,0.391128,1.737153,0.927911,0.443993,0.430684,0.035860,0.002218,0.002957,0.00037,0.015896,0.008872,0.000739,0.00000,0.000000,0.0,0.132348,0.043253,0.024030,0.023771,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,0.000000,0.000000,0.00000,0.000000,0.00000,0.000000,0.0,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.0,0.0,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,7.0,38.0,15.0,14.0,9.0,51.0,40.0,11.0,59.0,1.55,47.1,34.8,12.3,0.32,24758,0.0,0.0,,201920#,2202.0,2021
2641,,Adama TraorĂ©,es ESP,"FW,MF",Wolves,23.0,1996.0,25000000.0,178.0,Forward - Right Winger,right,Premier League,37.0,27.0,2605.0,4.0,9.0,0.0,0.0,1.0,0.0,0.14,0.31,0.45,0.14,0.45,3.5,3.5,6.7,0.12,0.23,0.35,0.12,0.35,11.2,17.0,4.0,7.0,23.5,1.52,0.36,0.06,0.25,0.07,-0.2,-0.2,399.0,493.0,80.9,7081.0,2067.0,190.0,222.0,85.6,140.0,167.0,83.8,56.0,87.0,64.4,-0.1,34.0,40.0,14.0,1.0,47.0,888.0,54.0,0.0,3.0,257.0,36.0,148.0,0.0,0.0,0.0,0.0,514.0,169.0,259.0,85.0,741.0,21.0,54.0,7.0,1.0,19.0,20.0,70.0,99.0,3.42,66.0,0.0,18.0,0.0,12.0,15.0,0.52,11.0,0.0,1.0,0.0,2.0,0.0,6.0,3.0,1.0,5.0,0.0,2.0,7.0,28.6,5.0,176.0,37.0,21.0,20.0,107.0,49.0,13.0,3.0,0.0,10.0,1.0,7.0,0.0,1400.0,28.0,151.0,666.0,767.0,118.0,1346.0,156.0,211.0,73.9,161.0,3.0,1252.0,13209.0,8675.0,1394.0,1045.0,75.0,82.0,90.0,0.0,39.0,77.0,5.0,0.0,0.0,0.0,171.0,48.0,51.0,48.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.001536,0.003455,0.0,0.0,0.000384,0.000000,0.000054,0.000119,0.000173,0.000054,0.000173,0.001344,0.001344,0.002572,0.000046,0.000088,0.000134,0.000046,0.000134,0.004299,0.006526,0.001536,0.002687,0.009021,0.000583,0.000138,0.000023,0.000096,0.000027,-0.000077,-0.000077,0.153167,0.189251,0.031056,2.718234,0.793474,0.072937,0.085221,0.032860,0.053743,0.064107,0.032169,0.021497,0.033397,0.024722,-0.000038,0.013052,0.015355,0.005374,0.000384,0.018042,0.340883,0.020729,0.000000,0.001152,0.098656,0.013820,0.056814,0.000000,0.0,0.00000,0.0,0.197313,0.064875,0.099424,0.032630,0.284453,0.008061,0.020729,0.002687,0.000384,0.007294,0.007678,0.026871,0.038004,0.001313,0.025336,0.000000,0.006910,0.000000,0.004607,0.005758,0.000200,0.004223,0.0,0.000384,0.00000,0.000768,0.00000,0.002303,0.001152,0.000384,0.001919,0.000000,0.000768,0.002687,0.010979,0.001919,0.067562,0.014203,0.008061,0.007678,0.041075,0.018810,0.004990,0.001152,0.0,0.003839,0.000384,0.002687,0.000000,0.537428,0.010749,0.057965,0.255662,0.294434,0.045298,0.516699,0.059885,0.080998,0.028369,0.061804,0.001152,0.480614,5.070633,3.330134,0.535125,0.401152,0.028791,0.031478,0.034549,0.00000,0.014971,0.029559,0.001919,0.00000,0.000000,0.0,0.065643,0.018426,0.019578,0.018618,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,0.000000,0.000000,0.00000,0.000000,0.00000,0.000000,0.0,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.0,0.0,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,7.0,38.0,15.0,14.0,9.0,51.0,40.0,11.0,59.0,1.55,47.1,34.8,12.3,0.32,24758,0.0,0.0,,201920#,2523.0,2021
2642,,JesĂşs Vallejo,es ESP,DF,Wolves,22.0,1997.0,6000000.0,184.0,Defender - Centre-Back,right,Premier League,2.0,1.0,163.0,0.0,0.0,0.0,0.0,1.0,0.0,0.00,0.00,0.00,0.00,0.00,0.0,0.0,0.0,0.00,0.00,0.00,0.00,0.00,1.8,0.0,0.0,0.0,0.0,0.00,0.00,0.00,0.00,0.00,0.0,0.0,81.0,104.0,77.9,1815.0,606.0,21.0,24.0,87.5,39.0,46.0,84.8,20.0,33.0,60.6,0.0,0.0,7.0,1.0,0.0,5.0,103.0,1.0,1.0,0.0,13.0,3.0,0.0,0.0,0.0,0.0,0.0,70.0,11.0,23.0,5.0,94.0,2.0,0.0,0.0,0.0,5.0,1.0,1.0,0.0,0.00,0.0,0.0,0.0,0.0,0.0,0.0,0.00,0.0,0.0,0.0,0.0,0.0,0.0,4.0,3.0,4.0,0.0,0.0,1.0,3.0,33.3,2.0,20.0,6.0,30.0,8.0,10.0,2.0,4.0,3.0,0.0,1.0,0.0,13.0,0.0,124.0,20.0,61.0,64.0,6.0,1.0,124.0,1.0,1.0,100.0,1.0,0.0,85.0,322.0,170.0,90.0,83.0,92.2,1.0,1.0,0.0,2.0,1.0,0.0,0.0,0.0,0.0,18.0,4.0,4.0,50.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.0,0.0,0.006135,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.011043,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.496933,0.638037,0.477914,11.134969,3.717791,0.128834,0.147239,0.536810,0.239264,0.282209,0.520245,0.122699,0.202454,0.371779,0.000000,0.000000,0.042945,0.006135,0.000000,0.030675,0.631902,0.006135,0.006135,0.000000,0.079755,0.018405,0.000000,0.000000,0.0,0.00000,0.0,0.429448,0.067485,0.141104,0.030675,0.576687,0.012270,0.000000,0.000000,0.000000,0.030675,0.006135,0.006135,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.00000,0.000000,0.00000,0.024540,0.018405,0.024540,0.000000,0.000000,0.006135,0.018405,0.204294,0.012270,0.122699,0.036810,0.184049,0.049080,0.061350,0.012270,0.024540,0.018405,0.0,0.006135,0.000000,0.079755,0.000000,0.760736,0.122699,0.374233,0.392638,0.036810,0.006135,0.760736,0.006135,0.006135,0.613497,0.006135,0.000000,0.521472,1.975460,1.042945,0.552147,0.509202,0.565644,0.006135,0.006135,0.00000,0.012270,0.006135,0.000000,0.00000,0.000000,0.0,0.110429,0.024540,0.024540,0.306748,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,0.000000,0.000000,0.00000,0.000000,0.00000,0.000000,0.0,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.0,0.0,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,7.0,38.0,15.0,14.0,9.0,51.0,40.0,11.0,59.0,1.55,47.1,34.8,12.3,0.32,24758,0.0,0.0,,201920#,2567.0,2021


In [119]:
def get_closest_match(name):
    # return closest match for join operation
    return ''.join(list(difflib.get_close_matches(name, all_players_name, n=1, cutoff=0.6)))

In [120]:
all_season["player"] = all_season["player"].fillna("")
all_season["closest_match"] = all_season["player"].apply(get_closest_match)
all_season = all_season[all_season["closest_match"] != ""]
all_season["player"] = all_season["closest_match"]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  all_season["player"] = all_season["closest_match"]


In [121]:
all_season = all_season.drop(["Unnamed: 0", "closest_match", "nationality", "Season", "CLBestScorer", "position", "squad", "league", "Column1", "position2", "birth_year"], axis=1)

In [122]:
all_season = all_season.drop_duplicates(subset=["player", "year"], keep="first")

In [123]:
all_season.to_csv("{}/interim/transfermarkt.csv".format(DATA_DIR), index=False)

## 3.6 FIFA Dataset

In [124]:
fifa_21 =  pd.read_csv("{}/raw/fifa/fifa-players_21.csv".format(DATA_DIR))
fifa_20 =  pd.read_csv("{}/raw/fifa/players_20.csv".format(DATA_DIR))
fifa_19 =  pd.read_csv("{}/raw/fifa/players_19.csv".format(DATA_DIR))
fifa_18 =  pd.read_csv("{}/raw/fifa/players_18.csv".format(DATA_DIR))

In [125]:
fifa_21["nationality"] = fifa_21["nationality"].apply(lambda x: x.strip())
fifa_20["nationality"] = fifa_20["nationality"].apply(lambda x: x.strip())
fifa_19["nationality"] = fifa_19["nationality"].apply(lambda x: x.strip())
fifa_18["nationality"] = fifa_18["nationality"].apply(lambda x: x.strip())

In [126]:
fifa_21 = fifa_21[fifa_21['nationality'].isin(main_df['team_name'].unique())]
fifa_20 = fifa_20[fifa_20['nationality'].isin(main_df['team_name'].unique())]
fifa_19 = fifa_19[fifa_19['nationality'].isin(main_df['team_name'].unique())]
fifa_18 = fifa_18[fifa_18['nationality'].isin(main_df['team_name'].unique())]

In [127]:
fifa_21['len_name'] = fifa_21["long_name"].apply(lambda x: len(x.split(" ")))
fifa_21['len_short_name'] = fifa_21["short_name"].apply(lambda x: len(x.split(" ")))
fifa_21['min_char_in_name'] = fifa_21['long_name'].apply(lambda x: min(len(y) for y in x.split()))
fifa_21['min_char_in_short_name'] = fifa_21['short_name'].apply(lambda x: min(len(y) for y in x.split()))

fifa_20['len_name'] = fifa_20["long_name"].apply(lambda x: len(x.split(" ")))
fifa_20['len_short_name'] = fifa_20["short_name"].apply(lambda x: len(x.split(" ")))
fifa_20['min_char_in_name'] = fifa_20['long_name'].apply(lambda x: min(len(y) for y in x.split()))
fifa_20['min_char_in_short_name'] = fifa_20['short_name'].apply(lambda x: min(len(y) for y in x.split()))

fifa_19['len_name'] = fifa_19["long_name"].apply(lambda x: len(x.split(" ")))
fifa_19['len_short_name'] = fifa_19["short_name"].apply(lambda x: len(x.split(" ")))
fifa_19['min_char_in_name'] = fifa_19['long_name'].apply(lambda x: min(len(y) for y in x.split()))
fifa_19['min_char_in_short_name'] = fifa_19['short_name'].apply(lambda x: min(len(y) for y in x.split()))

fifa_18['len_name'] = fifa_18["long_name"].apply(lambda x: len(x.split(" ")))
fifa_18['len_short_name'] = fifa_18["short_name"].apply(lambda x: len(x.split(" ")))
fifa_18['min_char_in_name'] = fifa_18['long_name'].apply(lambda x: min(len(y) for y in x.split()))
fifa_18['min_char_in_short_name'] = fifa_18['short_name'].apply(lambda x: min(len(y) for y in x.split()))

In [128]:
def join_tuple_string(strings_tuple):
    return ' '.join(strings_tuple)

def create_unigram_bigram_trigram_quadgram(text, x):
    token_list = text.split(" ")
    tuple_gram = list(everygrams(token_list, 2, x))
    result = map(join_tuple_string, tuple_gram) 
    if x > 2:
         return list(result) + [' '.join(token_list[::len(token_list)-1])]
    return list(result)

def calculate_closest_token(df):
    everygram = create_unigram_bigram_trigram_quadgram(df["long_name"], df['len_name'])
    closest = difflib.get_close_matches(df["short_name"], everygram, n=1)
    return ''.join(closest)

In [129]:
fifa_21['closest_match'] = fifa_21.apply(calculate_closest_token, axis=1)
fifa_20['closest_match'] = fifa_20.apply(calculate_closest_token, axis=1)
fifa_19['closest_match'] = fifa_19.apply(calculate_closest_token, axis=1)
fifa_18['closest_match'] = fifa_18.apply(calculate_closest_token, axis=1)

In [130]:
name_mapping = {
 'Aleksandar Dragović': 'Aleksandar Dragovic',
 'Aleš Matějů': 'Ales Mateju',
 'Alex Král': 'Alex Kral',
 'Anatoliy Trubin': 'Anatolii Trubin',
 'András Schäfer': 'Andras Schafer',
 'Dean Cornelius': 'Andreas Cornelius',
 'Andrej Kramarić': 'Andrej Kramaric',
 'Ante Rebić': 'Ante Rebic',
 'Bartosz Bereszyński': 'Bartosz Bereszynski',
 'Bećir Omeragić': 'Becir Omeragic',
 'Bogdan Mykhaylychenko': 'Bogdan Mykhaylichenko',
 'Borna Barišić': 'Borna Barisic',
 'B. Embolo': 'Breel Embolo',
 'Bruno Petković': 'Bruno Petkovic',
 'Burak Yılmaz': 'Burak Yilmaz',
 'Che Adams': 'Che Adams',
 'D.Rice': "Declan Rice",
 'Christian Günter': 'Chris Gunter',
 'Liam Craig Gordon': 'Craig Gordon',
 'Azpilicueta': 'César Azpilicueta',
 'Anga Dedryck Boyata': 'Dedryck Boyata',
 'Davor Lovren': 'Dejan Lovren',
 'Lemi Zakaria': 'Denis Zakaria',
 'Diego Javier Llorente': 'Diego Llorente',
 'Dmitriy Barinov': 'Dimitri Barinov',
 'Domagoj Bradarić': 'Domagoj Bradaric',
 'Dominik Livaković': 'Dominik Livakovic',
 'van de Beek': 'Donny van de Beek',
 'Dorukhan Toköz': 'Dorukhan Tokoz',
 'Duje Ćaleta-Car': 'Duje Caleta-Car',
 'Dušan Kuciak': 'Dusan Kuciak',
 'Miklós Sigér': 'Dávid Miklós Sigér',
 'Eray Ervin Cömert': 'Eray Cömert',
 'Frederik Rønnow': 'Frederik Rönnow',
 'Georgiy Bushchan': 'Georgi Bushchan',
 'Georgiy Dzhikiya': 'Georgi Dzhikiya',
 'Glen Adjei Kamara': 'Glen Kamara',
 'Greg Taylor': 'Greg Taylor',
 'Hakan Çalhanoğlu': 'Hakan Calhanoglu',
 'Hakan Calhanoglu':'Hakan Calhanoglu',
 'Haris Seferović': 'Haris Seferovic',
 'İlkay Gündoğan': 'Ilkay Gündogan',
 'İrfan Can Kahveci': 'Irfan Kahveci',
 'Ivan Perišić': 'Ivan Perisic',
 'Jakub Holúbek': 'Jakub Holubek',
 'Jamal Musiala': 'Jamal Musiala',
 'Alexander Lawrence': 'James Alexander Lawrence',
 'Jan Bořil': 'Jan Boril',
 'Jens Jønsson': 'Jens Jonsson',
 'Jere Juhani Uronen': 'Jere Uronen',
 'Jiří Pavlenka': 'Jirí Pavlenka',
 'Joakim Mæhle': 'Joakim Maehle',
 'Joseff Morrell': 'Joe Morrell',
 'Jordi Alba Ramos': 'Jordi Alba',
 'Josip Juranović': 'Josip Juranovic',
 'Palhinha': 'João Palhinha',
 'Jérémy Doku': 'Jéremy Doku',
 'Kamil Jóźwiak': 'Kamil Jozwiak',
 'Karol Świderski': 'Karol Swiderski',
 'Stefan Ristovski': 'Stefan Spirovski',
 'Kurt Happy Zouma': 'Kurt Zouma',
 'Lasse Schøne': 'Lasse Schöne',
 'Lovre Kalinić': 'Lovre Kalinic',
 'Lucas Hernández Pi': 'Lucas Hernández',
 'Luka Modrić': 'Luka Modric',
 'Lukáš Haraslín': 'Lukas Haraslin',
 'Lukáš Masopust': 'Lukas Masopust',
 'Łukasz Fabiański': 'Lukasz Fabianski',
 'Lukáš Hrádecký': 'Lukás Hrádecky',
 'Manuel Viana': 'Manuel Akanji',
 'Marcelo Brozović': 'Marcelo Brozovic',
 'Marcus Danielsson': 'Marcus Danielson',
 'Marek Hamšík': 'Marek Hamsik',
 'Marko Arnautović': 'Marko Arnautovic',
 'Martin Dúbravka': 'Martin Dubravka',
 'Matěj Vydra': 'Matej Vydra',
 'Mateo Kovačić': 'Mateo Kovacic',
 'Matúš Bero': 'Matús Bero',
 'Michael Krmenčík': 'Michal Krmencik',
 'Michael Gurski': 'Michal Duris',
 'Michał Helik': 'Michal Helik',
 'Carl Mikael Lustig': 'Mikael Lustig',
 'Oyarzabal': 'Mikel Oyarzabal',
 'Milan Škriniar': 'Milan Skriniar',
 'Mile Svilar': 'Mile Skoric',
 'Mislav Oršić': 'Mislav Orsic',
 'M. Kean': 'Moise Kean',
 'Mykola Matvienko': 'Mykola Matvyenko',
 'Nemanja Nikolić': 'Nemanja Nikolics',
 'N. Hämäläinen': 'Niko Hämäläinen',
 'Nikola Vlašić': 'Nikola Vlasic',
 'Nélson Cabral Semedo': 'Nélson Semedo',
 'Okay Yokuşlu': 'Okay Yokuslu',
 'Aleksandr Zhirov': 'Oleksandr Zubkov',
 'Ondřej Čelůstka': 'Ondrej Celustka',
 'Ondřej Kúdela': 'Ondrej Kudela',
 'Orkun Kökçü': 'Orkun Kökcü',
 'O. Kabak': 'Ozan Kabak',
 'Patrik Hrošovský': 'Patrik Hrosovsky',
 'Pavel Kadeřábek': 'Pavel Kaderábek',
 'Petr Ševčík': 'Petr Sevcik',
 'Philip Foden': 'Phil Foden',
 'Leo Bengtsson': 'Pierre Bengtsson',
 'Piotr Zieliński': 'Piotr Zielinski',
 'Przemysław Frankowski': 'Przemyslaw Frankowski',
 'Przemysław Płacheta': 'Przemyslaw Placheta',
 'Raphaël Varane': 'Raphael Varane',
 'Renato Júnior Luz Sanches': 'Renato Sanches',
 'Róbert Boženík': 'Robert Bozenik',
 'Ruslan Malinovskyi': 'Ruslan Malinovskiy',
 'Ryan Jiro Gravenberch': 'Ryan Gravenberch',
 'Saša Kalajdžić': 'Sasa Kalajdzic',
 'Sergiy Kryvtsov': 'Serhii Kryvtsov',
 'Šime Vrsaljko': 'Sime Vrsaljko',
 'Tamás Cseri': 'Tamas Cseri',
 'Taylan Antalyalı': 'Taylan Antalyali',
 'Tomáš Pekhart': 'Tomas Pekhart',
 'Tomáš Souček': 'Tomas Soucek',
 'Tomáš Suslov': 'Tomas Suslov',
 'Tomasz Kędziora': 'Tomasz Kedziora',
 'Thomas Holmes': 'Tomás Holes',
 'Tomáš Vaclík': 'Tomás Vaclik',
 'Uğurcan Çakır': 'Ugurcan Çakir',
 'Umut Meraş': 'Umut Meras',
 'Cengiz Umut Meraş': 'Umut Meras',
 'Vitaliy Mykolenko': 'Vitalii Mykolenko',
 'Vladimír Coufal': 'Vladimir Coufal',
 'Vladimír Darida': 'Vladimir Darida',
 'William Silva de Carvalho': 'William Carvalho',
 'Yuriy Zhirkov': 'Yuri Zhirkov',
 'Yusuf Yazıcı': 'Yusuf Yazici',
 'Çağlar Söyüncü': 'Çaglar Söyüncü',
 'C. Eriksen': "Christian Eriksen",
 'Alexander Walke': 'Alexander Isak',
 'Aleksandr Sobolev': 'Alexander Sobolev',
 'Antonín Barák': 'Antonin Barak',
 'Benjamin Cabango': 'Ben Cabango',
 'Bogdan Mykhaylichenko': 'Bogdan Mykhaylichenko',
 'Borna Barisic': 'Borna Barisic',
 'Mikael Lustig': 'Carl Mikael Lustig',
 'Che Adams': 'Che Adams',
 'Chris Gunter': 'Chris Gunter',
 'Christian Gentner': 'Christian Günter',
 'Daniel Avramovski': 'Daniel Avramovski',
 'Declan Rice': 'Declan Rice',
 'Dejan Kulusevski': 'Dejan Kulusevski',
 'Diogo José': 'Diogo Jota',
 'Domagoj Vida': 'Domagoj Vida',
 'Dominik Livakovic': 'Dominik Livakovic',
 'Dylan Levitt': 'Dylan Levitt',
 'Dávid Sigér': 'Dávid Sigér',
 'Eduard Sobol': 'Eduard Sobol',
 'Eljif Elmas': 'Eljif Elmas',
 'Eric García Martret': 'Eric García',
 'Ethan Ampadu': 'Ethan Ampadu',
 'Ferhan Hasani': 'Ferhan Hasani',
 'Filip Helander': 'Filip Holender',
 'Greg Taylor': 'Greg Taylor',
 'Halil Dervişoğlu': 'Halil Dervisoglu',
 'Irfan Kahveci': 'Irfan Can Kahveci',
 'Ivan Trickovski': 'Ivan Trickovski',
 'Jakub Świerczok': 'Jakub Swierczok',
 'Jamal Musiala': 'Jamal Musiala',
 'James Lawrence': 'Jamie Lawrence',
 'Jens-Lys Cajuste': 'Jens Cajuste',
 'Josip Juranovic': 'Josip Juranovic',
 'Jude Bellingham': 'Jude Bellingham',
 'Kacper Trelowski': 'Kacper Kozlowski',
 'Kamil Piątkowski': 'Kamil Piatkowski',
 'Leo Väisänen': 'Leo Väisänen',
 'Łukasz Skorupski': 'Lukasz Skorupski',
 'Lukáš Provod': 'Lukáš Provod',
 'Lyndon Dykes': 'Lyndon Dykes',
 'Magomed Ozdoev': 'Magomed Ozdoev',
 'Mário Fernandes': 'Mario Fernandes',
 'Mehmet Zeki Çelik': 'Mehmet Zeki Çelik',
 'Merih Demiral': 'Merih Demiral',
 'Mert Müldür': 'Mert Müldür',
 'Paweł Dawidowicz': 'Pawel Dawidowicz',
 'Petr Sevcik': 'Petr Sevcik',
 'Pyry Soiri': 'Pyry Soiri',
 'Rabbi Matondo': 'Rabbi Matondo',
 'Rıdvan Yılmaz': 'Ridvan Yilmaz',
 'Robert Bozenik': 'Robert Bozenik',
 'Robert Sanchez': 'Robert Sánchez',
 'Serhiy Sydorchuk': 'Serhiy Sydorchuk',
 'Tamas Cseri': 'Tamas Cseri',
 'Tomáš Kalas': 'Tomas Kalas',
 'Tomás Holes': 'Tomás Holes',
 'Tomáš Koubek': 'Tomáš Souček',
 'Ugurcan Çakir': 'Ugurcan Cakir',
 'Vitalii Mykolenko': 'Vitaliy Mykolenko',
 'Vladimir Coufal': 'Vladimir Coufal',
 'Vladimir Darida': 'Vladimír Darida',
 'Vlatko Stojanovski': 'Vlatko Stojanovski',
 'Wojciech Szczęsny': 'Wojciech Szczesny',
 'Simon Thorup Kjær': "Simon Kjaer",
 'Simon Kjær': "Simon Kjaer",
 "Simon Kjær": "Simon Kjaer",
 'Ádám Lang': 'Ádám Lang',
 'Luís Gayà': 'José Gayá',
 'João Félix Sequeira': 'João Félix',
 'De Gea':'David de Gea',
 'Ferrán Torres': 'Ferran Torres',
 'Mehmet Çelik': 'Mehmet Zeki Çelik',
 'Can Kahveci': 'Irfan Can Kahveci',
 'Mert Günok': 'Fehmi Mert Günok',
 'J. Stryger Larsen': 'Jens Stryger Larsen',
 'Jens Larsen': 'Jens Stryger Larsen',
 'José Guerreiro': 'Raphael Guerreiro',
 'D. Sow': 'Djibril Sow',
 'Ben Yedder': 'Wissam Ben Yedder',
 'Lukás Hrádecky': 'Lukas Hradecky',
 'Mikael Lustig' : 'Carl Mikael Lustig',
 'Thiago':'Thiago Alcántara',
 'Vladimír Darida' : "Vladimír Darida",
 'Tomáš Hubočan': 'Tomas Hubocan',
 'Anga Boyata': 'Dedryck Boyata',
 'Ilkay Gündogan': 'İlkay Gündoğan',
 "Morata":'Álvaro Morata',
 "I. Perišić" :"Ivan Perišić",
 "Andrew Robertson": "Andy Robertson",
 "Peter McLaughlin": "Jon McLaughlin",
 "Iván Rodríguez": "Ricardo Rodríguez",
 "Landry Mvogo":"Yvon Mvogo",
 "Alexander Granlund": "Albin Granlund"
}

In [131]:
def heuristic_match(df):
    if df["len_short_name"] == 1:
        return df["short_name"]
    if len(df["closest_match"].split()) == 0:
        return df["short_name"]
    elif df["min_char_in_short_name"] >= 4:
        return df["short_name"]
    elif df["len_name"] > 3:
        return df["closest_match"]
    elif df["min_char_in_name"] >= 3:
        return df["closest_match"]
    else:
        return df["long_name"]

In [132]:
fifa_21["player"] = fifa_21.apply(heuristic_match, axis=1)
fifa_20["player"] = fifa_20.apply(heuristic_match, axis=1)
fifa_19["player"] = fifa_19.apply(heuristic_match, axis=1)
fifa_18["player"] = fifa_18.apply(heuristic_match, axis=1)

In [133]:
def map_name(name):
    global name_mapping
    if name in name_mapping.keys():
        return name_mapping[name]
    else:
        return name

In [134]:
fifa_21['player'] = fifa_21['player'].apply(map_name)
fifa_20['player'] = fifa_20['player'].apply(map_name)
fifa_19['player'] = fifa_19['player'].apply(map_name)
fifa_18['player'] = fifa_18['player'].apply(map_name)

In [135]:
col = ["player", "nationality", "work_rate", "age", "height_cm", "weight_kg", "league_rank", "overall", "potential", "wage_eur", 
       "international_reputation", "pace", "shooting", "passing", "dribbling", "defending",  'nation_position', 'nation_jersey_number',
       "physic", "attacking_crossing", "attacking_finishing", "attacking_heading_accuracy", "attacking_short_passing",
       "attacking_volleys", "skill_dribbling", "skill_curve", "skill_fk_accuracy", "skill_long_passing", "skill_ball_control",
       "movement_acceleration", "movement_sprint_speed", "movement_agility", "movement_reactions", "movement_balance", "power_shot_power",
       "power_jumping", "power_stamina","power_strength", "power_long_shots", "mentality_aggression", "mentality_interceptions",
       "mentality_positioning", "mentality_vision", "mentality_penalties", "mentality_composure", "defending_standing_tackle",
       "defending_sliding_tackle", "goalkeeping_diving", "goalkeeping_handling", "goalkeeping_kicking", "goalkeeping_positioning", "goalkeeping_reflexes"]

In [136]:
fifa_21 = fifa_21[col]
fifa_20 = fifa_20[col]
fifa_19 = fifa_19[col]
fifa_18 = fifa_18[col]

In [137]:
fifa_21.rename(columns={"nationality": "team_name"}, inplace=True)
fifa_20.rename(columns={"nationality": "team_name"}, inplace=True)
fifa_19.rename(columns={"nationality": "team_name"}, inplace=True)
fifa_18.rename(columns={"nationality": "team_name"}, inplace=True)

In [138]:
fifa_21 = fifa_21.drop_duplicates(subset=["player", "team_name"], keep="first")
fifa_20 = fifa_20.drop_duplicates(subset=["player", "team_name"], keep="first")
fifa_19 = fifa_19.drop_duplicates(subset=["player", "team_name"], keep="first")
fifa_18 = fifa_18.drop_duplicates(subset=["player", "team_name"], keep="first")

In [139]:
fifa_21["year"] = 2021
fifa_20["year"] = 2020
fifa_19["year"] = 2019
fifa_18["year"] = 2018

In [140]:
fifa = pd.concat([fifa_21, fifa_20, fifa_19, fifa_18])

In [141]:
main_df = pd.merge(main_df, fifa, how="left", on=["player", "team_name", "year"])

## Feature Engineering

In [142]:
main_df.head()

Unnamed: 0,player,date,league_name,team_name,opponent_name,fantasy_points,min,position,year,month,day,points,value,skill,last_md_points,last_md_goals,last_md_assists,prev_mean_points,prev_mean_goals,prev_mean_assists,prev_max_points,prev_std_points,prev_std_goals,prev_std_assists,prev_median_min,prev_ratio_starter,count_played,goal_consistency,assist_consistency,clean_sheet_consistency,count_team_played,prev_max_goal_to_specific_opp,prev_max_points_to_specific_opp,prev_mean_points_to_specific_opp,prev_ratio_played,prev_team_highest_rank,team_rank,team_total_points,prev_opponent_highest_rank,opponent_rank,opponent_total_points,hth_team_win,hth_opp_win,hth_draw,hth_team_score,hth_opp_score,htt_max_margin,team_mean_squad_age,team_market_value,team_mean_market_value,opponent_mean_squad_age,opponent_market_value,opponent_mean_market_value,work_rate,age,height_cm,weight_kg,league_rank,overall,potential,wage_eur,international_reputation,pace,shooting,passing,dribbling,defending,nation_position,nation_jersey_number,physic,attacking_crossing,attacking_finishing,attacking_heading_accuracy,attacking_short_passing,attacking_volleys,skill_dribbling,skill_curve,skill_fk_accuracy,skill_long_passing,skill_ball_control,movement_acceleration,movement_sprint_speed,movement_agility,movement_reactions,movement_balance,power_shot_power,power_jumping,power_stamina,power_strength,power_long_shots,mentality_aggression,mentality_interceptions,mentality_positioning,mentality_vision,mentality_penalties,mentality_composure,defending_standing_tackle,defending_sliding_tackle,goalkeeping_diving,goalkeeping_handling,goalkeeping_kicking,goalkeeping_positioning,goalkeeping_reflexes
0,Aaron Ramsey,2018-09-06,UEFA Nations League,Wales,Republic of Ireland,21.0,90.0,M,2018,9,6,7.0,8.5,3.0,,,,,,,,,,,,,,,,,,,,,,18,19,1536,26,29,1484,0.0,2.0,2.0,0.0,4.0,3.0,25.6,1767500000.0,68000000.0,25.5,776500000.0,29900000.0,High/High,26.0,183.0,76.0,1.0,82.0,83.0,130000.0,3.0,68.0,75.0,79.0,81.0,65.0,RF,10.0,75.0,75.0,72.0,58.0,84.0,79.0,81.0,70.0,70.0,80.0,82.0,67.0,68.0,76.0,81.0,75.0,81.0,67.0,89.0,69.0,74.0,73.0,69.0,82.0,80.0,75.0,81.0,70.0,67.0,6.0,11.0,5.0,10.0,8.0
1,Aaron Ramsey,2018-09-06,UEFA Nations League,Wales,Republic of Ireland,21.0,90.0,M,2018,9,6,7.0,4.0,1.0,7.0,1.0,0.0,,,,,,,,,,,,,,,,,,,18,19,1536,26,29,1484,0.0,2.0,2.0,0.0,4.0,3.0,25.6,1767500000.0,68000000.0,25.5,776500000.0,29900000.0,High/High,26.0,183.0,76.0,1.0,82.0,83.0,130000.0,3.0,68.0,75.0,79.0,81.0,65.0,RF,10.0,75.0,75.0,72.0,58.0,84.0,79.0,81.0,70.0,70.0,80.0,82.0,67.0,68.0,76.0,81.0,75.0,81.0,67.0,89.0,69.0,74.0,73.0,69.0,82.0,80.0,75.0,81.0,70.0,67.0,6.0,11.0,5.0,10.0,8.0
2,Aaron Ramsey,2018-09-09,UEFA Nations League,Wales,Denmark,13.5,90.0,M,2018,9,9,2.0,8.5,3.0,7.0,1.0,0.0,7.0,1.0,0.0,7.0,0.0,0.0,0.0,90.0,1.0,1.0,1.0,0.0,0.0,1.0,,,,1.0,18,19,1536,9,9,1580,0.0,0.0,0.0,0.0,0.0,,25.6,1767500000.0,68000000.0,27.8,3107000000.0,119500000.0,High/High,26.0,183.0,76.0,1.0,82.0,83.0,130000.0,3.0,68.0,75.0,79.0,81.0,65.0,RF,10.0,75.0,75.0,72.0,58.0,84.0,79.0,81.0,70.0,70.0,80.0,82.0,67.0,68.0,76.0,81.0,75.0,81.0,67.0,89.0,69.0,74.0,73.0,69.0,82.0,80.0,75.0,81.0,70.0,67.0,6.0,11.0,5.0,10.0,8.0
3,Aaron Ramsey,2018-09-09,UEFA Nations League,Wales,Denmark,13.5,90.0,M,2018,9,9,2.0,4.0,1.0,2.0,0.0,0.0,7.0,1.0,0.0,7.0,0.0,0.0,0.0,90.0,1.0,1.0,1.0,0.0,0.0,1.0,,,,1.0,18,19,1536,9,9,1580,0.0,0.0,0.0,0.0,0.0,,25.6,1767500000.0,68000000.0,27.8,3107000000.0,119500000.0,High/High,26.0,183.0,76.0,1.0,82.0,83.0,130000.0,3.0,68.0,75.0,79.0,81.0,65.0,RF,10.0,75.0,75.0,72.0,58.0,84.0,79.0,81.0,70.0,70.0,80.0,82.0,67.0,68.0,76.0,81.0,75.0,81.0,67.0,89.0,69.0,74.0,73.0,69.0,82.0,80.0,75.0,81.0,70.0,67.0,6.0,11.0,5.0,10.0,8.0
4,Aaron Ramsey,2018-10-11,International Friendlies,Wales,Spain,1.42,90.0,M,2018,10,11,2.0,8.5,3.0,2.0,0.0,0.0,4.5,0.5,0.0,7.0,2.672612,0.534522,0.0,90.0,1.0,2.0,0.5,0.0,0.0,2.0,,,,1.0,18,19,1536,6,9,1597,0.0,0.0,0.0,0.0,0.0,,,,,,,,High/High,26.0,183.0,76.0,1.0,82.0,83.0,130000.0,3.0,68.0,75.0,79.0,81.0,65.0,RF,10.0,75.0,75.0,72.0,58.0,84.0,79.0,81.0,70.0,70.0,80.0,82.0,67.0,68.0,76.0,81.0,75.0,81.0,67.0,89.0,69.0,74.0,73.0,69.0,82.0,80.0,75.0,81.0,70.0,67.0,6.0,11.0,5.0,10.0,8.0


In [143]:
main_df["diff_team_points"] = main_df['team_total_points'] - main_df['opponent_total_points']
main_df["diff_team_ranking"]= main_df['team_rank'] - main_df['opponent_rank']
main_df["diff_team_market_value"] = main_df['team_market_value'] - main_df['opponent_market_value']
main_df["diff_team_mean_market_value"] = main_df['team_mean_market_value'] - main_df['opponent_mean_market_value']
main_df["diff_team_mean_squad_age"] = main_df['team_mean_squad_age'] - main_df['opponent_mean_squad_age']
main_df["diff_team_ranking"]= main_df['team_rank'] - main_df['opponent_rank']
main_df["is_senior"] = main_df["age"] > main_df["team_mean_squad_age"]
main_df["is_imbalanced"]= main_df['diff_team_ranking'].apply(lambda x: abs(x) > 10)
main_df["gap_to_potential"] = main_df["potential"] - main_df["overall"]

main_df["roi"] = main_df["points"] / main_df["value"]
main_df["more_likely_to_win"] = (main_df["hth_team_win"] - main_df["hth_opp_win"]) >= 2

main_df["work_rate"] = main_df['work_rate'].fillna("")
main_df[["attacking_work_rate", "defending_work_rate"]] = main_df["work_rate"].apply(lambda x: pd.Series(x.split("/")))
main_df = main_df.drop(["work_rate"], axis=1)

In [144]:
main_df

Unnamed: 0,player,date,league_name,team_name,opponent_name,fantasy_points,min,position,year,month,day,points,value,skill,last_md_points,last_md_goals,last_md_assists,prev_mean_points,prev_mean_goals,prev_mean_assists,prev_max_points,prev_std_points,prev_std_goals,prev_std_assists,prev_median_min,prev_ratio_starter,count_played,goal_consistency,assist_consistency,clean_sheet_consistency,count_team_played,prev_max_goal_to_specific_opp,prev_max_points_to_specific_opp,prev_mean_points_to_specific_opp,prev_ratio_played,prev_team_highest_rank,team_rank,team_total_points,prev_opponent_highest_rank,opponent_rank,opponent_total_points,hth_team_win,hth_opp_win,hth_draw,hth_team_score,hth_opp_score,htt_max_margin,team_mean_squad_age,team_market_value,team_mean_market_value,opponent_mean_squad_age,opponent_market_value,opponent_mean_market_value,age,height_cm,weight_kg,league_rank,overall,potential,wage_eur,international_reputation,pace,shooting,passing,dribbling,defending,nation_position,nation_jersey_number,physic,attacking_crossing,attacking_finishing,attacking_heading_accuracy,attacking_short_passing,attacking_volleys,skill_dribbling,skill_curve,skill_fk_accuracy,skill_long_passing,skill_ball_control,movement_acceleration,movement_sprint_speed,movement_agility,movement_reactions,movement_balance,power_shot_power,power_jumping,power_stamina,power_strength,power_long_shots,mentality_aggression,mentality_interceptions,mentality_positioning,mentality_vision,mentality_penalties,mentality_composure,defending_standing_tackle,defending_sliding_tackle,goalkeeping_diving,goalkeeping_handling,goalkeeping_kicking,goalkeeping_positioning,goalkeeping_reflexes,diff_team_points,diff_team_ranking,diff_team_market_value,diff_team_mean_market_value,diff_team_mean_squad_age,is_senior,is_imbalanced,gap_to_potential,roi,more_likely_to_win,attacking_work_rate,defending_work_rate
0,Aaron Ramsey,2018-09-06 00:00:00,UEFA Nations League,Wales,Republic of Ireland,21.00,90.0,M,2018,9,6,7.0,8.5,3.0,,,,,,,,,,,,,,,,,,,,,,18,19,1536,26,29,1484,0.0,2.0,2.0,0.0,4.0,3.0,25.6,1.767500e+09,68000000.0,25.5,7.765000e+08,29900000.0,26.0,183.0,76.0,1.0,82.0,83.0,130000.0,3.0,68.0,75.0,79.0,81.0,65.0,RF,10.0,75.0,75.0,72.0,58.0,84.0,79.0,81.0,70.0,70.0,80.0,82.0,67.0,68.0,76.0,81.0,75.0,81.0,67.0,89.0,69.0,74.0,73.0,69.0,82.0,80.0,75.0,81.0,70.0,67.0,6.0,11.0,5.0,10.0,8.0,52,-10,9.910000e+08,38100000.0,0.1,True,False,1.0,0.823529,False,High,High
1,Aaron Ramsey,2018-09-06 00:00:00,UEFA Nations League,Wales,Republic of Ireland,21.00,90.0,M,2018,9,6,7.0,4.0,1.0,7.0,1.0,0.0,,,,,,,,,,,,,,,,,,,18,19,1536,26,29,1484,0.0,2.0,2.0,0.0,4.0,3.0,25.6,1.767500e+09,68000000.0,25.5,7.765000e+08,29900000.0,26.0,183.0,76.0,1.0,82.0,83.0,130000.0,3.0,68.0,75.0,79.0,81.0,65.0,RF,10.0,75.0,75.0,72.0,58.0,84.0,79.0,81.0,70.0,70.0,80.0,82.0,67.0,68.0,76.0,81.0,75.0,81.0,67.0,89.0,69.0,74.0,73.0,69.0,82.0,80.0,75.0,81.0,70.0,67.0,6.0,11.0,5.0,10.0,8.0,52,-10,9.910000e+08,38100000.0,0.1,True,False,1.0,1.750000,False,High,High
2,Aaron Ramsey,2018-09-09 00:00:00,UEFA Nations League,Wales,Denmark,13.50,90.0,M,2018,9,9,2.0,8.5,3.0,7.0,1.0,0.0,7.000000,1.0,0.0,7.0,0.000000,0.000000,0.0,90.0,1.000000,1.0,1.000000,0.0,0.000000,1.0,,,,1.000000,18,19,1536,9,9,1580,0.0,0.0,0.0,0.0,0.0,,25.6,1.767500e+09,68000000.0,27.8,3.107000e+09,119500000.0,26.0,183.0,76.0,1.0,82.0,83.0,130000.0,3.0,68.0,75.0,79.0,81.0,65.0,RF,10.0,75.0,75.0,72.0,58.0,84.0,79.0,81.0,70.0,70.0,80.0,82.0,67.0,68.0,76.0,81.0,75.0,81.0,67.0,89.0,69.0,74.0,73.0,69.0,82.0,80.0,75.0,81.0,70.0,67.0,6.0,11.0,5.0,10.0,8.0,-44,10,-1.339500e+09,-51500000.0,-2.2,True,False,1.0,0.235294,False,High,High
3,Aaron Ramsey,2018-09-09 00:00:00,UEFA Nations League,Wales,Denmark,13.50,90.0,M,2018,9,9,2.0,4.0,1.0,2.0,0.0,0.0,7.000000,1.0,0.0,7.0,0.000000,0.000000,0.0,90.0,1.000000,1.0,1.000000,0.0,0.000000,1.0,,,,1.000000,18,19,1536,9,9,1580,0.0,0.0,0.0,0.0,0.0,,25.6,1.767500e+09,68000000.0,27.8,3.107000e+09,119500000.0,26.0,183.0,76.0,1.0,82.0,83.0,130000.0,3.0,68.0,75.0,79.0,81.0,65.0,RF,10.0,75.0,75.0,72.0,58.0,84.0,79.0,81.0,70.0,70.0,80.0,82.0,67.0,68.0,76.0,81.0,75.0,81.0,67.0,89.0,69.0,74.0,73.0,69.0,82.0,80.0,75.0,81.0,70.0,67.0,6.0,11.0,5.0,10.0,8.0,-44,10,-1.339500e+09,-51500000.0,-2.2,True,False,1.0,0.500000,False,High,High
4,Aaron Ramsey,2018-10-11 00:00:00,International Friendlies,Wales,Spain,1.42,90.0,M,2018,10,11,2.0,8.5,3.0,2.0,0.0,0.0,4.500000,0.5,0.0,7.0,2.672612,0.534522,0.0,90.0,1.000000,2.0,0.500000,0.0,0.000000,2.0,,,,1.000000,18,19,1536,6,9,1597,0.0,0.0,0.0,0.0,0.0,,,,,,,,26.0,183.0,76.0,1.0,82.0,83.0,130000.0,3.0,68.0,75.0,79.0,81.0,65.0,RF,10.0,75.0,75.0,72.0,58.0,84.0,79.0,81.0,70.0,70.0,80.0,82.0,67.0,68.0,76.0,81.0,75.0,81.0,67.0,89.0,69.0,74.0,73.0,69.0,82.0,80.0,75.0,81.0,70.0,67.0,6.0,11.0,5.0,10.0,8.0,-61,10,,,,False,False,1.0,0.235294,False,High,High
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8431,Çaglar Söyüncü,2021-03-24 00:00:00,European World Cup Qualifiers,Turkey,Netherlands,4.98,90.0,D,2021,3,24,1.0,5.0,2.0,6.0,0.0,0.0,3.357143,0.0,0.0,6.0,2.273836,0.000000,0.0,90.0,0.928571,14.0,0.000000,0.0,0.428571,26.0,,,,0.538462,29,32,1487,12,14,1609,1.0,3.0,1.0,4.0,6.0,3.0,25.0,3.250000e+09,125000000.0,27.4,6.070500e+09,242800000.0,24.0,185.0,82.0,1.0,80.0,85.0,69000.0,1.0,69.0,46.0,60.0,67.0,80.0,RCB,4.0,80.0,46.0,39.0,75.0,76.0,48.0,65.0,39.0,39.0,64.0,71.0,65.0,72.0,64.0,77.0,57.0,65.0,76.0,68.0,82.0,44.0,89.0,79.0,42.0,55.0,52.0,76.0,84.0,80.0,6.0,7.0,7.0,7.0,7.0,-122,18,-2.820500e+09,-117800000.0,-2.4,False,True,5.0,0.200000,False,Medium,Medium
8432,Çaglar Söyüncü,2021-03-27 00:00:00,European World Cup Qualifiers,Turkey,Norway,16.72,90.0,D,2021,3,27,12.0,5.0,2.0,1.0,0.0,0.0,3.200000,0.0,0.0,6.0,2.274078,0.000000,0.0,90.0,0.933333,15.0,0.000000,0.0,0.400000,27.0,,,,0.555556,29,32,1487,43,44,1450,0.0,0.0,0.0,0.0,0.0,,25.0,3.250000e+09,125000000.0,25.4,2.512000e+09,104700000.0,24.0,185.0,82.0,1.0,80.0,85.0,69000.0,1.0,69.0,46.0,60.0,67.0,80.0,RCB,4.0,80.0,46.0,39.0,75.0,76.0,48.0,65.0,39.0,39.0,64.0,71.0,65.0,72.0,64.0,77.0,57.0,65.0,76.0,68.0,82.0,44.0,89.0,79.0,42.0,55.0,52.0,76.0,84.0,80.0,6.0,7.0,7.0,7.0,7.0,37,-12,7.380000e+08,20300000.0,-0.4,False,True,5.0,2.400000,False,Medium,Medium
8433,Çaglar Söyüncü,2021-03-30 00:00:00,European World Cup Qualifiers,Turkey,Latvia,3.54,90.0,D,2021,3,30,0.0,5.0,2.0,12.0,1.0,0.0,3.750000,0.0,0.0,12.0,3.109126,0.250000,0.0,90.0,0.937500,16.0,0.062500,0.0,0.437500,28.0,,,,0.571429,29,32,1487,125,136,1082,0.0,0.0,3.0,5.0,5.0,0.0,25.0,3.250000e+09,125000000.0,25.8,7.080000e+07,2620000.0,24.0,185.0,82.0,1.0,80.0,85.0,69000.0,1.0,69.0,46.0,60.0,67.0,80.0,RCB,4.0,80.0,46.0,39.0,75.0,76.0,48.0,65.0,39.0,39.0,64.0,71.0,65.0,72.0,64.0,77.0,57.0,65.0,76.0,68.0,82.0,44.0,89.0,79.0,42.0,55.0,52.0,76.0,84.0,80.0,6.0,7.0,7.0,7.0,7.0,405,-104,3.179200e+09,122380000.0,-0.8,False,True,5.0,0.000000,False,Medium,Medium
8434,Çaglar Söyüncü,2021-06-11 21:00:00,European Championship 2020,Turkey,Italy,0.00,0.0,D,2021,6,11,0.0,5.0,2.0,0.0,0.0,0.0,3.529412,0.0,0.0,12.0,3.144790,0.242536,0.0,90.0,0.941176,17.0,0.058824,0.0,0.411765,29.0,,,,0.586207,29,29,1505,7,7,1642,0.0,0.0,0.0,0.0,0.0,,25.0,3.250000e+09,125000000.0,27.8,7.640000e+09,293800000.0,24.0,185.0,82.0,1.0,80.0,85.0,69000.0,1.0,69.0,46.0,60.0,67.0,80.0,RCB,4.0,80.0,46.0,39.0,75.0,76.0,48.0,65.0,39.0,39.0,64.0,71.0,65.0,72.0,64.0,77.0,57.0,65.0,76.0,68.0,82.0,44.0,89.0,79.0,42.0,55.0,52.0,76.0,84.0,80.0,6.0,7.0,7.0,7.0,7.0,-137,22,-4.390000e+09,-168800000.0,-2.8,False,True,5.0,0.000000,False,Medium,Medium


In [145]:
main_df.to_csv("{}/processed/dataset_md2.csv".format(DATA_DIR), index=False)