# ToDos from previous notebook

#### thoughts for feature engineering

1. aggregate nationality into continents
--> DONE
2. convert all positions into some sort of "the more positions able to play the more flexible the player is"
3. Build average position values for attack, midfield and defending out of individual position values
3. investigate what work rate, international reputation and skill_moves refer to
4. investigate upon player traits and aggregate into broader categories

In [1]:
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
df_fp = pd.read_csv("data/fp_data_v2.csv", index_col=0)
df_gk = pd.read_csv("data/gk_data_v2.csv", index_col=0)
pd.set_option('display.max_columns', None)

In [3]:
df_gk.head(3)

Unnamed: 0,market_value_€,player_name,long_name,year_of_birth,player_age,height_cm,weight_kg,nationality,main_position,club,league,divison,national_team_current_or_past,national_team_country,current_national_player,no_current_national_player,national_team_appearances,international_reputation,overall,potential,preferred_foot,weak_foot,skill_moves,gk_diving,gk_handling,gk_kicking,gk_reflexes,gk_speed,gk_positioning,player_traits,attacking_short_passing,skill_long_passing,skill_ball_control,movement_acceleration,movement_sprint_speed,movement_agility,movement_reactions,movement_balance,power_shot_power,power_jumping,power_stamina,power_strength,mentality_aggression,mentality_vision,mentality_composure,goalkeeping_diving,goalkeeping_handling,goalkeeping_kicking,goalkeeping_positioning,goalkeeping_reflexes
0,80000000.0,Jan Oblak,Jan Oblak,1993,26,188,87,Slovenia,Torwart,Atlético Madrid,LaLiga,1,1,Slowenien,1,0,28,3,91,93,Right,3,1,87,92,78,89,52,90,"Flair, Acrobatic Clearance",43,40,30,43,60,67,88,49,59,78,41,78,34,65,68,87,92,78,90,89
1,72000000.0,Marc-André ter Stegen,Marc-André ter Stegen,1992,28,187,85,Germany,Torwart,FC Barcelona,LaLiga,1,1,Deutschland,1,0,24,3,90,93,Right,4,1,88,85,88,90,45,88,"Swerve Pass, Acrobatic Clearance, Flair Passes",61,63,30,38,50,37,86,43,66,79,35,78,43,70,70,88,85,88,88,90
2,72000000.0,Alisson,Alisson Ramses Becker,1992,28,191,91,Brazil,Torwart,FC Liverpool,Premier League,1,1,Brasilien,0,1,44,3,89,91,Right,3,1,85,84,85,89,51,90,"Flair, Swerve Pass",45,44,30,56,47,40,88,37,64,52,32,78,27,66,65,85,84,85,90,89


In [4]:
df_fp.head(3)

Unnamed: 0,market_value_€,player_name,long_name,year_of_birth,player_age,height_cm,weight_kg,nationality,main_position,all_positions,club,league,divison,national_team_current_or_past,national_team_country,current_national_player,no_current_national_player,national_team_appearances,international_reputation,overall,potential,preferred_foot,weak_foot,skill_moves,work_rate,pace,shooting,passing,dribbling,defending,physic,player_traits,attacking_crossing,attacking_finishing,attacking_heading_accuracy,attacking_short_passing,attacking_volleys,skill_dribbling,skill_curve,skill_fk_accuracy,skill_long_passing,skill_ball_control,movement_acceleration,movement_sprint_speed,movement_agility,movement_reactions,movement_balance,power_shot_power,power_jumping,power_stamina,power_strength,power_long_shots,mentality_aggression,mentality_interceptions,mentality_positioning,mentality_vision,mentality_penalties,mentality_composure,defending_marking,defending_standing_tackle,defending_sliding_tackle,ls,st,rs,lw,lf,cf,rf,rw,lam,cam,ram,lm,lcm,cm,rcm,rm,lwb,ldm,cdm,rdm,rwb,lb,lcb,cb,rcb,rb
0,112000000.0,Lionel Messi,Lionel Andrés Messi Cuccittini,1987,32,170,72,Argentina,Rechtsaußen,"RW, CF, ST",FC Barcelona,LaLiga,1,1,Argentinien,1,0,138,5,94,94,Left,4,4,Medium/Low,87,92,92,96,39,66,"Beat Offside Trap, Argues with Officials, Earl...",88,95,70,92,88,97,93,94,92,96,91,84,93,95,95,86,68,75,68,94,48,40,94,94,75,96,33,37,26,89,89,89,93,93,93,93,93,93,93,93,92,87,87,87,92,68,66,66,66,68,63,52,52,52,63
1,60000000.0,Cristiano Ronaldo,Cristiano Ronaldo dos Santos Aveiro,1985,35,187,83,Portugal,Linksaußen,"ST, LW",Juventus Turin,Serie A,1,1,Portugal,1,0,164,5,93,93,Right,4,5,High/Low,90,93,82,89,35,78,"Long Throw-in, Selfish, Argues with Officials,...",84,94,89,83,87,89,81,76,77,92,89,91,87,96,71,95,95,85,78,93,63,29,95,82,85,95,28,32,24,91,91,91,89,90,90,90,89,88,88,88,88,81,81,81,88,65,61,61,61,65,61,53,53,53,61
2,128000000.0,Neymar,Neymar da Silva Santos Junior,1992,28,175,68,Brazil,Linksaußen,"LW, CAM",FC Paris Saint-Germain,Ligue 1,1,1,Brasilien,1,0,102,5,92,92,Right,5,5,High/Medium,91,85,87,95,32,58,"Power Free-Kick, Injury Free, Selfish, Early C...",87,87,62,87,87,96,88,87,81,95,94,89,96,92,84,80,61,81,49,84,51,36,87,90,90,94,27,26,29,84,84,84,90,89,89,89,90,90,90,90,89,82,82,82,89,66,61,61,61,66,61,46,46,46,61


## 1 - Nationality and continent

In [5]:
# 1. Nationality and National team country
translations = pd.read_csv("data/countries_translations_ger_eng.csv")
translations_dict = pd.Series(translations.tm_english.values,index=translations.tm_deutsch).to_dict()
translations_dict


{'Algerien': 'Algeria',
 'Angola': 'Angola',
 'Benin': 'Benin',
 'Burkina Faso': 'Burkina Faso',
 'Burundi': 'Burundi',
 'Kamerun': 'Cameroon',
 'Kap Verde': 'Cape Verde',
 'Komoren': 'Comoros',
 'Ägypten': 'Egypt',
 'Eritrea': 'Eritrea',
 'Gabun': 'Gabon',
 'Ghana': 'Ghana',
 'Guinea': 'Guinea',
 'Kenia': 'Kenya',
 'Liberia': 'Liberia',
 'Libyen': 'Libya',
 'Madagaskar': 'Madagascar',
 'Malawi': 'Malawi',
 'Mali': 'Mali',
 'Mauretanien': 'Mauritania',
 'Marokko': 'Morocco',
 'Mosambik': 'Mozambique',
 'Namibia': 'Namibia',
 'Niger': 'Niger',
 'Nigeria': 'Nigeria',
 'Ruanda': 'Rwanda',
 'Senegal': 'Senegal',
 'Sierra Leone': 'Sierra Leone',
 'Südafrika': 'South Africa',
 'Südsudan': 'South Sudan',
 'Togo': 'Togo',
 'Tunesien': 'Tunisia',
 'Uganda': 'Uganda',
 'Sambia': 'Zambia',
 'Simbabwe': 'Zimbabwe',
 'Äquatorialguinea': 'Equatorial Guinea',
 'Afghanistan': 'Afghanistan',
 'Armenien': 'Armenia',
 'Aserbaidschan': 'Azerbaijan',
 'Bahrain': 'Bahrain',
 'China': 'China PR',
 'Georgien'

In [6]:
df_fp["national_team_country"] = df_fp["national_team_country"].replace(translations_dict)
df_gk["national_team_country"] = df_gk["national_team_country"].replace(translations_dict)

df_fp.head(3)

Unnamed: 0,market_value_€,player_name,long_name,year_of_birth,player_age,height_cm,weight_kg,nationality,main_position,all_positions,club,league,divison,national_team_current_or_past,national_team_country,current_national_player,no_current_national_player,national_team_appearances,international_reputation,overall,potential,preferred_foot,weak_foot,skill_moves,work_rate,pace,shooting,passing,dribbling,defending,physic,player_traits,attacking_crossing,attacking_finishing,attacking_heading_accuracy,attacking_short_passing,attacking_volleys,skill_dribbling,skill_curve,skill_fk_accuracy,skill_long_passing,skill_ball_control,movement_acceleration,movement_sprint_speed,movement_agility,movement_reactions,movement_balance,power_shot_power,power_jumping,power_stamina,power_strength,power_long_shots,mentality_aggression,mentality_interceptions,mentality_positioning,mentality_vision,mentality_penalties,mentality_composure,defending_marking,defending_standing_tackle,defending_sliding_tackle,ls,st,rs,lw,lf,cf,rf,rw,lam,cam,ram,lm,lcm,cm,rcm,rm,lwb,ldm,cdm,rdm,rwb,lb,lcb,cb,rcb,rb
0,112000000.0,Lionel Messi,Lionel Andrés Messi Cuccittini,1987,32,170,72,Argentina,Rechtsaußen,"RW, CF, ST",FC Barcelona,LaLiga,1,1,Argentina,1,0,138,5,94,94,Left,4,4,Medium/Low,87,92,92,96,39,66,"Beat Offside Trap, Argues with Officials, Earl...",88,95,70,92,88,97,93,94,92,96,91,84,93,95,95,86,68,75,68,94,48,40,94,94,75,96,33,37,26,89,89,89,93,93,93,93,93,93,93,93,92,87,87,87,92,68,66,66,66,68,63,52,52,52,63
1,60000000.0,Cristiano Ronaldo,Cristiano Ronaldo dos Santos Aveiro,1985,35,187,83,Portugal,Linksaußen,"ST, LW",Juventus Turin,Serie A,1,1,Portugal,1,0,164,5,93,93,Right,4,5,High/Low,90,93,82,89,35,78,"Long Throw-in, Selfish, Argues with Officials,...",84,94,89,83,87,89,81,76,77,92,89,91,87,96,71,95,95,85,78,93,63,29,95,82,85,95,28,32,24,91,91,91,89,90,90,90,89,88,88,88,88,81,81,81,88,65,61,61,61,65,61,53,53,53,61
2,128000000.0,Neymar,Neymar da Silva Santos Junior,1992,28,175,68,Brazil,Linksaußen,"LW, CAM",FC Paris Saint-Germain,Ligue 1,1,1,Brazil,1,0,102,5,92,92,Right,5,5,High/Medium,91,85,87,95,32,58,"Power Free-Kick, Injury Free, Selfish, Early C...",87,87,62,87,87,96,88,87,81,95,94,89,96,92,84,80,61,81,49,84,51,36,87,90,90,94,27,26,29,84,84,84,90,89,89,89,90,90,90,90,89,82,82,82,89,66,61,61,61,66,61,46,46,46,61


In [7]:
country_continents1 = pd.read_csv("data/countries_continent.csv")
country_continents1 = country_continents1.rename(columns={"country": "nationality"})

df_fp = pd.merge(df_fp,country_continents1,on='nationality')
df_gk = pd.merge(df_gk,country_continents1,on='nationality')

df_fp.head(3)

Unnamed: 0,market_value_€,player_name,long_name,year_of_birth,player_age,height_cm,weight_kg,nationality,main_position,all_positions,club,league,divison,national_team_current_or_past,national_team_country,current_national_player,no_current_national_player,national_team_appearances,international_reputation,overall,potential,preferred_foot,weak_foot,skill_moves,work_rate,pace,shooting,passing,dribbling,defending,physic,player_traits,attacking_crossing,attacking_finishing,attacking_heading_accuracy,attacking_short_passing,attacking_volleys,skill_dribbling,skill_curve,skill_fk_accuracy,skill_long_passing,skill_ball_control,movement_acceleration,movement_sprint_speed,movement_agility,movement_reactions,movement_balance,power_shot_power,power_jumping,power_stamina,power_strength,power_long_shots,mentality_aggression,mentality_interceptions,mentality_positioning,mentality_vision,mentality_penalties,mentality_composure,defending_marking,defending_standing_tackle,defending_sliding_tackle,ls,st,rs,lw,lf,cf,rf,rw,lam,cam,ram,lm,lcm,cm,rcm,rm,lwb,ldm,cdm,rdm,rwb,lb,lcb,cb,rcb,rb,geographical_continent
0,112000000.0,Lionel Messi,Lionel Andrés Messi Cuccittini,1987,32,170,72,Argentina,Rechtsaußen,"RW, CF, ST",FC Barcelona,LaLiga,1,1,Argentina,1,0,138,5,94,94,Left,4,4,Medium/Low,87,92,92,96,39,66,"Beat Offside Trap, Argues with Officials, Earl...",88,95,70,92,88,97,93,94,92,96,91,84,93,95,95,86,68,75,68,94,48,40,94,94,75,96,33,37,26,89,89,89,93,93,93,93,93,93,93,93,92,87,87,87,92,68,66,66,66,68,63,52,52,52,63,South America
1,52000000.0,Sergio Agüero,Sergio Leonel Agüero del Castillo,1988,32,173,70,Argentina,Mittelstürmer,ST,Manchester City,Premier League,1,1,Argentina,1,0,97,4,89,89,Right,4,4,High/Medium,80,90,77,88,33,74,"Avoids Using Weaker Foot, Outside Foot Shot",70,93,78,83,85,88,83,73,64,89,82,78,84,92,91,89,81,79,74,84,65,24,93,83,83,90,30,29,24,87,87,87,85,87,87,87,85,85,85,85,83,77,77,77,83,60,58,58,58,60,56,50,50,50,56,South America
2,72000000.0,Paulo Dybala,Paulo Bruno Exequiel Dybala,1993,26,177,75,Argentina,Hängende Spitze,"CAM, RW",Juventus Turin,Serie A,1,1,Argentina,1,0,29,3,88,92,Left,3,4,Medium/Medium,83,82,84,90,43,64,"Beat Offside Trap, Selfish, Finesse Shot, Spee...",82,80,64,87,88,90,88,88,75,93,86,81,91,84,85,80,75,79,61,86,48,42,80,87,86,84,32,48,40,80,80,80,85,85,85,85,85,86,86,86,85,80,80,80,85,68,64,64,64,68,64,54,54,54,64,South America


In [8]:
def movecol(df, cols_to_move=[], ref_col='', place='After'):
    
    cols = df.columns.tolist()    
    if place == 'After':
        seg1 = cols[:list(cols).index(ref_col) + 1]
        seg2 = cols_to_move
    if place == 'Before':
        seg1 = cols[:list(cols).index(ref_col)]
        seg2 = cols_to_move + [ref_col]
    
    seg1 = [i for i in seg1 if i not in seg2]
    seg3 = [i for i in cols if i not in seg1 + seg2]
    
    return(df[seg1 + seg2 + seg3])

df_fp = movecol(df_fp, cols_to_move=["geographical_continent"], ref_col="nationality", place="After")
df_gk = movecol(df_gk, cols_to_move=["geographical_continent"], ref_col="nationality", place="After")

df_fp = df_fp.sort_values(by='overall', ascending=False)
df_gk = df_gk.sort_values(by='overall', ascending=False)
df_fp = df_fp.reset_index(drop=True)
df_gk = df_gk.reset_index(drop=True)

df_fp.head(3)

Unnamed: 0,market_value_€,player_name,long_name,year_of_birth,player_age,height_cm,weight_kg,nationality,geographical_continent,main_position,all_positions,club,league,divison,national_team_current_or_past,national_team_country,current_national_player,no_current_national_player,national_team_appearances,international_reputation,overall,potential,preferred_foot,weak_foot,skill_moves,work_rate,pace,shooting,passing,dribbling,defending,physic,player_traits,attacking_crossing,attacking_finishing,attacking_heading_accuracy,attacking_short_passing,attacking_volleys,skill_dribbling,skill_curve,skill_fk_accuracy,skill_long_passing,skill_ball_control,movement_acceleration,movement_sprint_speed,movement_agility,movement_reactions,movement_balance,power_shot_power,power_jumping,power_stamina,power_strength,power_long_shots,mentality_aggression,mentality_interceptions,mentality_positioning,mentality_vision,mentality_penalties,mentality_composure,defending_marking,defending_standing_tackle,defending_sliding_tackle,ls,st,rs,lw,lf,cf,rf,rw,lam,cam,ram,lm,lcm,cm,rcm,rm,lwb,ldm,cdm,rdm,rwb,lb,lcb,cb,rcb,rb
0,112000000.0,Lionel Messi,Lionel Andrés Messi Cuccittini,1987,32,170,72,Argentina,South America,Rechtsaußen,"RW, CF, ST",FC Barcelona,LaLiga,1,1,Argentina,1,0,138,5,94,94,Left,4,4,Medium/Low,87,92,92,96,39,66,"Beat Offside Trap, Argues with Officials, Earl...",88,95,70,92,88,97,93,94,92,96,91,84,93,95,95,86,68,75,68,94,48,40,94,94,75,96,33,37,26,89,89,89,93,93,93,93,93,93,93,93,92,87,87,87,92,68,66,66,66,68,63,52,52,52,63
1,60000000.0,Cristiano Ronaldo,Cristiano Ronaldo dos Santos Aveiro,1985,35,187,83,Portugal,Europe,Linksaußen,"ST, LW",Juventus Turin,Serie A,1,1,Portugal,1,0,164,5,93,93,Right,4,5,High/Low,90,93,82,89,35,78,"Long Throw-in, Selfish, Argues with Officials,...",84,94,89,83,87,89,81,76,77,92,89,91,87,96,71,95,95,85,78,93,63,29,95,82,85,95,28,32,24,91,91,91,89,90,90,90,89,88,88,88,88,81,81,81,88,65,61,61,61,65,61,53,53,53,61
2,128000000.0,Neymar,Neymar da Silva Santos Junior,1992,28,175,68,Brazil,South America,Linksaußen,"LW, CAM",FC Paris Saint-Germain,Ligue 1,1,1,Brazil,1,0,102,5,92,92,Right,5,5,High/Medium,91,85,87,95,32,58,"Power Free-Kick, Injury Free, Selfish, Early C...",87,87,62,87,87,96,88,87,81,95,94,89,96,92,84,80,61,81,49,84,51,36,87,90,90,94,27,26,29,84,84,84,90,89,89,89,90,90,90,90,89,82,82,82,89,66,61,61,61,66,61,46,46,46,61


## 2 - Positions

only applies for fieldplayers, since goalkeepers only have one position

In [9]:
df_fp["main_position"].value_counts()

Innenverteidiger         2176
Mittelstürmer            1565
Zentrales Mittelfeld     1392
Defensives Mittelfeld    1067
Rechter Verteidiger       992
Linker Verteidiger        953
Linksaußen                744
Offensives Mittelfeld     744
Rechtsaußen               719
Linkes Mittelfeld         243
Rechtes Mittelfeld        208
Hängende Spitze           109
Mittelfeld                 35
Sturm                      19
Abwehr                     13
Name: main_position, dtype: int64

In [10]:
# adjust german positions to english/fifa notation
positions_tm_in_fifa = pd.read_csv("data/tm_mainpos_to_fifa_pos.csv")
positions_tm_in_fifa_dict = pd.Series(positions_tm_in_fifa.fifa.values,index=positions_tm_in_fifa.tm).to_dict()

df_fp["main_position"] = df_fp["main_position"].replace(positions_tm_in_fifa_dict)

df_fp.head()

Unnamed: 0,market_value_€,player_name,long_name,year_of_birth,player_age,height_cm,weight_kg,nationality,geographical_continent,main_position,all_positions,club,league,divison,national_team_current_or_past,national_team_country,current_national_player,no_current_national_player,national_team_appearances,international_reputation,overall,potential,preferred_foot,weak_foot,skill_moves,work_rate,pace,shooting,passing,dribbling,defending,physic,player_traits,attacking_crossing,attacking_finishing,attacking_heading_accuracy,attacking_short_passing,attacking_volleys,skill_dribbling,skill_curve,skill_fk_accuracy,skill_long_passing,skill_ball_control,movement_acceleration,movement_sprint_speed,movement_agility,movement_reactions,movement_balance,power_shot_power,power_jumping,power_stamina,power_strength,power_long_shots,mentality_aggression,mentality_interceptions,mentality_positioning,mentality_vision,mentality_penalties,mentality_composure,defending_marking,defending_standing_tackle,defending_sliding_tackle,ls,st,rs,lw,lf,cf,rf,rw,lam,cam,ram,lm,lcm,cm,rcm,rm,lwb,ldm,cdm,rdm,rwb,lb,lcb,cb,rcb,rb
0,112000000.0,Lionel Messi,Lionel Andrés Messi Cuccittini,1987,32,170,72,Argentina,South America,Right Forward,"RW, CF, ST",FC Barcelona,LaLiga,1,1,Argentina,1,0,138,5,94,94,Left,4,4,Medium/Low,87,92,92,96,39,66,"Beat Offside Trap, Argues with Officials, Earl...",88,95,70,92,88,97,93,94,92,96,91,84,93,95,95,86,68,75,68,94,48,40,94,94,75,96,33,37,26,89,89,89,93,93,93,93,93,93,93,93,92,87,87,87,92,68,66,66,66,68,63,52,52,52,63
1,60000000.0,Cristiano Ronaldo,Cristiano Ronaldo dos Santos Aveiro,1985,35,187,83,Portugal,Europe,Left Forward,"ST, LW",Juventus Turin,Serie A,1,1,Portugal,1,0,164,5,93,93,Right,4,5,High/Low,90,93,82,89,35,78,"Long Throw-in, Selfish, Argues with Officials,...",84,94,89,83,87,89,81,76,77,92,89,91,87,96,71,95,95,85,78,93,63,29,95,82,85,95,28,32,24,91,91,91,89,90,90,90,89,88,88,88,88,81,81,81,88,65,61,61,61,65,61,53,53,53,61
2,128000000.0,Neymar,Neymar da Silva Santos Junior,1992,28,175,68,Brazil,South America,Left Forward,"LW, CAM",FC Paris Saint-Germain,Ligue 1,1,1,Brazil,1,0,102,5,92,92,Right,5,5,High/Medium,91,85,87,95,32,58,"Power Free-Kick, Injury Free, Selfish, Early C...",87,87,62,87,87,96,88,87,81,95,94,89,96,92,84,80,61,81,49,84,51,36,87,90,90,94,27,26,29,84,84,84,90,89,89,89,90,90,90,90,89,82,82,82,89,66,61,61,61,66,61,46,46,46,61
3,120000000.0,Kevin De Bruyne,Kevin De Bruyne,1991,28,181,70,Belgium,Europe,Centre Attacking Midfield,"CAM, CM",Manchester City,Premier League,1,1,Belgium,1,0,74,4,91,91,Right,5,4,High/High,76,86,92,86,61,78,"Power Free-Kick, Avoids Using Weaker Foot, Div...",93,82,55,92,82,86,85,83,91,91,77,76,78,91,76,91,63,89,74,90,76,61,88,94,79,91,68,58,51,82,82,82,87,87,87,87,87,88,88,88,88,87,87,87,88,77,77,77,77,77,73,66,66,66,73
4,80000000.0,Eden Hazard,Eden Hazard,1991,28,175,74,Belgium,Europe,Left Forward,"LW, CF",Real Madrid,LaLiga,1,1,Belgium,1,0,106,4,91,91,Right,4,4,High/Medium,91,83,86,94,35,66,"Beat Offside Trap, Selfish, Finesse Shot, Spee...",81,84,61,89,83,95,83,79,83,94,94,88,95,90,94,82,56,84,63,80,54,41,87,89,88,91,34,27,22,83,83,83,89,88,88,88,89,89,89,89,89,83,83,83,89,66,63,63,63,66,61,49,49,49,61


In [11]:
df_fp["all_positions"] = df_fp.all_positions.map(lambda x: [i.strip() for i in x.split(",")])
df_fp["positional_flexibility"] = df_fp["all_positions"].str.len()
df_fp["positional_flexibility"] = df_fp["positional_flexibility"].astype("object")
df_fp = df_fp.drop(["all_positions"],axis=1)
df_fp = movecol(df_fp, cols_to_move=["positional_flexibility"], ref_col="main_position", place="After")

df_fp.head(10)

Unnamed: 0,market_value_€,player_name,long_name,year_of_birth,player_age,height_cm,weight_kg,nationality,geographical_continent,main_position,positional_flexibility,club,league,divison,national_team_current_or_past,national_team_country,current_national_player,no_current_national_player,national_team_appearances,international_reputation,overall,potential,preferred_foot,weak_foot,skill_moves,work_rate,pace,shooting,passing,dribbling,defending,physic,player_traits,attacking_crossing,attacking_finishing,attacking_heading_accuracy,attacking_short_passing,attacking_volleys,skill_dribbling,skill_curve,skill_fk_accuracy,skill_long_passing,skill_ball_control,movement_acceleration,movement_sprint_speed,movement_agility,movement_reactions,movement_balance,power_shot_power,power_jumping,power_stamina,power_strength,power_long_shots,mentality_aggression,mentality_interceptions,mentality_positioning,mentality_vision,mentality_penalties,mentality_composure,defending_marking,defending_standing_tackle,defending_sliding_tackle,ls,st,rs,lw,lf,cf,rf,rw,lam,cam,ram,lm,lcm,cm,rcm,rm,lwb,ldm,cdm,rdm,rwb,lb,lcb,cb,rcb,rb
0,112000000.0,Lionel Messi,Lionel Andrés Messi Cuccittini,1987,32,170,72,Argentina,South America,Right Forward,3,FC Barcelona,LaLiga,1,1,Argentina,1,0,138,5,94,94,Left,4,4,Medium/Low,87,92,92,96,39,66,"Beat Offside Trap, Argues with Officials, Earl...",88,95,70,92,88,97,93,94,92,96,91,84,93,95,95,86,68,75,68,94,48,40,94,94,75,96,33,37,26,89,89,89,93,93,93,93,93,93,93,93,92,87,87,87,92,68,66,66,66,68,63,52,52,52,63
1,60000000.0,Cristiano Ronaldo,Cristiano Ronaldo dos Santos Aveiro,1985,35,187,83,Portugal,Europe,Left Forward,2,Juventus Turin,Serie A,1,1,Portugal,1,0,164,5,93,93,Right,4,5,High/Low,90,93,82,89,35,78,"Long Throw-in, Selfish, Argues with Officials,...",84,94,89,83,87,89,81,76,77,92,89,91,87,96,71,95,95,85,78,93,63,29,95,82,85,95,28,32,24,91,91,91,89,90,90,90,89,88,88,88,88,81,81,81,88,65,61,61,61,65,61,53,53,53,61
2,128000000.0,Neymar,Neymar da Silva Santos Junior,1992,28,175,68,Brazil,South America,Left Forward,2,FC Paris Saint-Germain,Ligue 1,1,1,Brazil,1,0,102,5,92,92,Right,5,5,High/Medium,91,85,87,95,32,58,"Power Free-Kick, Injury Free, Selfish, Early C...",87,87,62,87,87,96,88,87,81,95,94,89,96,92,84,80,61,81,49,84,51,36,87,90,90,94,27,26,29,84,84,84,90,89,89,89,90,90,90,90,89,82,82,82,89,66,61,61,61,66,61,46,46,46,61
3,120000000.0,Kevin De Bruyne,Kevin De Bruyne,1991,28,181,70,Belgium,Europe,Centre Attacking Midfield,2,Manchester City,Premier League,1,1,Belgium,1,0,74,4,91,91,Right,5,4,High/High,76,86,92,86,61,78,"Power Free-Kick, Avoids Using Weaker Foot, Div...",93,82,55,92,82,86,85,83,91,91,77,76,78,91,76,91,63,89,74,90,76,61,88,94,79,91,68,58,51,82,82,82,87,87,87,87,87,88,88,88,88,87,87,87,88,77,77,77,77,77,73,66,66,66,73
4,80000000.0,Eden Hazard,Eden Hazard,1991,28,175,74,Belgium,Europe,Left Forward,2,Real Madrid,LaLiga,1,1,Belgium,1,0,106,4,91,91,Right,4,4,High/Medium,91,83,86,94,35,66,"Beat Offside Trap, Selfish, Finesse Shot, Spee...",81,84,61,89,83,95,83,79,83,94,94,88,95,90,94,82,56,84,63,80,54,41,87,89,88,91,34,27,22,83,83,83,89,88,88,88,89,89,89,89,89,83,83,83,89,66,63,63,63,66,61,49,49,49,61
5,80000000.0,Virgil van Dijk,Virgil van Dijk,1991,28,193,92,Netherlands,Europe,Centre Back,1,FC Liverpool,Premier League,1,1,Netherlands,1,0,33,3,90,91,Right,3,2,Medium/Medium,77,60,70,71,90,86,"Diver, Avoids Using Weaker Foot, Leadership, L...",53,52,86,78,45,70,60,70,81,76,74,79,61,88,53,81,90,75,92,64,82,89,47,65,62,89,91,92,85,69,69,69,67,69,69,69,67,69,69,69,69,74,74,74,69,79,83,83,83,79,81,87,87,87,81
6,120000000.0,Mohamed Salah,Mohamed Salah Ghaly,1992,27,175,71,Egypt,Africa,Right Forward,2,FC Liverpool,Premier League,1,1,Egypt,1,0,67,3,90,90,Left,3,4,High/Medium,93,86,81,89,45,74,"Beat Offside Trap, Argues with Officials, Earl...",79,90,59,84,79,89,83,69,75,89,94,92,91,92,88,80,69,85,73,84,63,55,92,84,77,91,38,43,41,84,84,84,88,88,88,88,88,87,87,87,87,81,81,81,87,70,67,67,67,70,66,57,57,57,66
7,12000000.0,Luka Modric,Luka Modrić,1985,34,172,66,Croatia,Europe,Centre Midfield,1,Real Madrid,LaLiga,1,1,Croatia,1,0,127,4,90,90,Right,4,4,High/High,74,76,89,89,72,66,"Argues with Officials, Finesse Shot, Speed Dri...",86,72,55,92,76,87,85,78,88,92,77,71,92,89,93,79,68,85,58,82,62,82,79,91,82,92,68,76,71,77,77,77,84,83,83,83,84,86,86,86,85,87,87,87,85,81,81,81,81,81,79,72,72,72,79
8,52000000.0,Sergio Agüero,Sergio Leonel Agüero del Castillo,1988,32,173,70,Argentina,South America,Striker,1,Manchester City,Premier League,1,1,Argentina,1,0,97,4,89,89,Right,4,4,High/Medium,80,90,77,88,33,74,"Avoids Using Weaker Foot, Outside Foot Shot",70,93,78,83,85,88,83,73,64,89,82,78,84,92,91,89,81,79,74,84,65,24,93,83,83,90,30,29,24,87,87,87,85,87,87,87,85,85,85,85,83,77,77,77,83,60,58,58,58,60,56,50,50,50,56
9,80000000.0,N'Golo Kanté,N'Golo Kanté,1991,29,168,72,France,Europe,Centre Midfield,2,FC Chelsea,Premier League,1,1,France,1,0,39,3,89,90,Right,3,2,Medium/High,78,65,77,81,87,83,Diver,68,65,54,86,56,79,49,49,81,80,79,77,82,93,92,71,77,97,73,63,90,92,72,79,54,85,90,91,85,72,72,72,77,76,76,76,77,78,78,78,79,82,82,82,79,85,87,87,87,85,84,83,83,83,84


In [12]:
# flexibility 2 --> find max and count how often / on how many positions
df_fp["ID"] = df_fp.index
df_fp["ID"] = df_fp["ID"].astype("object")
df_fp_flex = df_fp[["ID","player_name","ls","st","rs","lw","lf","cf","rf","rw","lam","cam","ram","lm","lcm","cm","rcm","rm","lwb","ldm","cdm","rdm","rwb","lb","lcb","cb","rcb","rb"]]
df_fp_flex["max_pos_value"] = df_fp_flex.max(axis=1)
df_fp_flex.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """


Unnamed: 0,ID,player_name,ls,st,rs,lw,lf,cf,rf,rw,lam,cam,ram,lm,lcm,cm,rcm,rm,lwb,ldm,cdm,rdm,rwb,lb,lcb,cb,rcb,rb,max_pos_value
0,0,Lionel Messi,89,89,89,93,93,93,93,93,93,93,93,92,87,87,87,92,68,66,66,66,68,63,52,52,52,63,93
1,1,Cristiano Ronaldo,91,91,91,89,90,90,90,89,88,88,88,88,81,81,81,88,65,61,61,61,65,61,53,53,53,61,91
2,2,Neymar,84,84,84,90,89,89,89,90,90,90,90,89,82,82,82,89,66,61,61,61,66,61,46,46,46,61,90
3,3,Kevin De Bruyne,82,82,82,87,87,87,87,87,88,88,88,88,87,87,87,88,77,77,77,77,77,73,66,66,66,73,88
4,4,Eden Hazard,83,83,83,89,88,88,88,89,89,89,89,89,83,83,83,89,66,63,63,63,66,61,49,49,49,61,89


In [13]:
for i in df_fp_flex.columns:
    df_fp_flex[f"{i}"] = df_fp_flex[f"{i}"].astype("object")
print(df_fp_flex.dtypes)
df_fp_flex.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


ID               object
player_name      object
ls               object
st               object
rs               object
lw               object
lf               object
cf               object
rf               object
rw               object
lam              object
cam              object
ram              object
lm               object
lcm              object
cm               object
rcm              object
rm               object
lwb              object
ldm              object
cdm              object
rdm              object
rwb              object
lb               object
lcb              object
cb               object
rcb              object
rb               object
max_pos_value    object
dtype: object


Unnamed: 0,ID,player_name,ls,st,rs,lw,lf,cf,rf,rw,lam,cam,ram,lm,lcm,cm,rcm,rm,lwb,ldm,cdm,rdm,rwb,lb,lcb,cb,rcb,rb,max_pos_value
0,0,Lionel Messi,89,89,89,93,93,93,93,93,93,93,93,92,87,87,87,92,68,66,66,66,68,63,52,52,52,63,93
1,1,Cristiano Ronaldo,91,91,91,89,90,90,90,89,88,88,88,88,81,81,81,88,65,61,61,61,65,61,53,53,53,61,91
2,2,Neymar,84,84,84,90,89,89,89,90,90,90,90,89,82,82,82,89,66,61,61,61,66,61,46,46,46,61,90
3,3,Kevin De Bruyne,82,82,82,87,87,87,87,87,88,88,88,88,87,87,87,88,77,77,77,77,77,73,66,66,66,73,88
4,4,Eden Hazard,83,83,83,89,88,88,88,89,89,89,89,89,83,83,83,89,66,63,63,63,66,61,49,49,49,61,89


## 3 - Average over positional values

## 4 - Workrate, Int. Reputation, Skill Moves

## 5 - Player traits

#### Work to do next notebook
- truncate dataframe
- do second EDA, after dataframe has been truncated and features have been engineered
- especially have a look at national team appearances
- check for correlation among features!!!
- clustering & other unsupervised learning methods???

#### further work
- build preprocessing pipeline for numerical and categorical vars separately