In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from scipy import stats
from functools import reduce
from sklearn.datasets.samples_generator import make_blobs
from sklearn.cluster import KMeans
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import MinMaxScaler

In [2]:
pd.set_option('display.float_format', lambda x: '%.3f' % x)
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

In [3]:
#fifa data cleaning
df_fifa16 = pd.read_csv('players_16.csv')
df_fifa16['year'] = '15/16'
df_fifa17 = pd.read_csv('players_17.csv')
df_fifa17['year'] = '16/17'
df_fifa18 = pd.read_csv('players_18.csv')
df_fifa18['year'] = '17/18'
df_fifa19 = pd.read_csv('players_19.csv')
df_fifa19['year'] = '18/19'
df_fifa20 = pd.read_csv('players_20.csv')
df_fifa20['year'] = '19/20'

In [4]:
df_fifa16_clean = df_fifa16[['short_name','overall', 'club', 'value_eur', 'wage_eur', 'player_positions', 'year', 'potential', 'preferred_foot', 'international_reputation', 'weak_foot', 'skill_moves', 'team_position', 'team_jersey_number', 'joined', 'contract_valid_until', 'pace', 'shooting', 'passing', 'dribbling', 'defending', 'physic', 'gk_diving', 'gk_handling', 'gk_kicking', 'gk_reflexes', 'gk_speed', 'gk_positioning', 'player_traits', 'attacking_crossing', 'attacking_finishing', 'attacking_heading_accuracy', 'attacking_short_passing', 'attacking_volleys', 'skill_dribbling', 'skill_curve', 'skill_fk_accuracy', 'skill_long_passing', 'skill_ball_control', 'movement_acceleration', 'movement_sprint_speed', 'movement_agility', 'movement_reactions', 'movement_balance', 'power_shot_power', 'power_jumping', 'power_stamina', 'power_strength', 'power_long_shots', 'mentality_aggression', 'mentality_interceptions', 'mentality_positioning', 'mentality_vision', 'mentality_penalties', 'mentality_composure', 'defending_marking', 'defending_standing_tackle', 'defending_sliding_tackle', 'goalkeeping_diving', 'goalkeeping_handling', 'goalkeeping_kicking', 'goalkeeping_positioning', 'goalkeeping_reflexes']]
df_fifa17_clean = df_fifa17[['short_name','overall', 'club', 'value_eur', 'wage_eur', 'player_positions', 'year', 'potential', 'preferred_foot', 'international_reputation', 'weak_foot', 'skill_moves', 'team_position', 'team_jersey_number', 'joined', 'contract_valid_until', 'pace', 'shooting', 'passing', 'dribbling', 'defending', 'physic', 'gk_diving', 'gk_handling', 'gk_kicking', 'gk_reflexes', 'gk_speed', 'gk_positioning', 'player_traits', 'attacking_crossing', 'attacking_finishing', 'attacking_heading_accuracy', 'attacking_short_passing', 'attacking_volleys', 'skill_dribbling', 'skill_curve', 'skill_fk_accuracy', 'skill_long_passing', 'skill_ball_control', 'movement_acceleration', 'movement_sprint_speed', 'movement_agility', 'movement_reactions', 'movement_balance', 'power_shot_power', 'power_jumping', 'power_stamina', 'power_strength', 'power_long_shots', 'mentality_aggression', 'mentality_interceptions', 'mentality_positioning', 'mentality_vision', 'mentality_penalties', 'mentality_composure', 'defending_marking', 'defending_standing_tackle', 'defending_sliding_tackle', 'goalkeeping_diving', 'goalkeeping_handling', 'goalkeeping_kicking', 'goalkeeping_positioning', 'goalkeeping_reflexes']]
df_fifa18_clean = df_fifa18[['short_name','overall', 'club', 'value_eur', 'wage_eur', 'player_positions', 'year', 'potential', 'preferred_foot', 'international_reputation', 'weak_foot', 'skill_moves', 'team_position', 'team_jersey_number', 'joined', 'contract_valid_until', 'pace', 'shooting', 'passing', 'dribbling', 'defending', 'physic', 'gk_diving', 'gk_handling', 'gk_kicking', 'gk_reflexes', 'gk_speed', 'gk_positioning', 'player_traits', 'attacking_crossing', 'attacking_finishing', 'attacking_heading_accuracy', 'attacking_short_passing', 'attacking_volleys', 'skill_dribbling', 'skill_curve', 'skill_fk_accuracy', 'skill_long_passing', 'skill_ball_control', 'movement_acceleration', 'movement_sprint_speed', 'movement_agility', 'movement_reactions', 'movement_balance', 'power_shot_power', 'power_jumping', 'power_stamina', 'power_strength', 'power_long_shots', 'mentality_aggression', 'mentality_interceptions', 'mentality_positioning', 'mentality_vision', 'mentality_penalties', 'mentality_composure', 'defending_marking', 'defending_standing_tackle', 'defending_sliding_tackle', 'goalkeeping_diving', 'goalkeeping_handling', 'goalkeeping_kicking', 'goalkeeping_positioning', 'goalkeeping_reflexes']]
df_fifa19_clean = df_fifa19[['short_name','overall', 'club', 'value_eur', 'wage_eur', 'player_positions', 'year', 'potential', 'preferred_foot', 'international_reputation', 'weak_foot', 'skill_moves', 'team_position', 'team_jersey_number', 'joined', 'contract_valid_until', 'pace', 'shooting', 'passing', 'dribbling', 'defending', 'physic', 'gk_diving', 'gk_handling', 'gk_kicking', 'gk_reflexes', 'gk_speed', 'gk_positioning', 'player_traits', 'attacking_crossing', 'attacking_finishing', 'attacking_heading_accuracy', 'attacking_short_passing', 'attacking_volleys', 'skill_dribbling', 'skill_curve', 'skill_fk_accuracy', 'skill_long_passing', 'skill_ball_control', 'movement_acceleration', 'movement_sprint_speed', 'movement_agility', 'movement_reactions', 'movement_balance', 'power_shot_power', 'power_jumping', 'power_stamina', 'power_strength', 'power_long_shots', 'mentality_aggression', 'mentality_interceptions', 'mentality_positioning', 'mentality_vision', 'mentality_penalties', 'mentality_composure', 'defending_marking', 'defending_standing_tackle', 'defending_sliding_tackle', 'goalkeeping_diving', 'goalkeeping_handling', 'goalkeeping_kicking', 'goalkeeping_positioning', 'goalkeeping_reflexes']]
df_fifa20_clean = df_fifa20[['short_name','overall', 'club', 'value_eur', 'wage_eur', 'player_positions', 'year', 'potential', 'preferred_foot', 'international_reputation', 'weak_foot', 'skill_moves', 'team_position', 'team_jersey_number', 'joined', 'contract_valid_until', 'pace', 'shooting', 'passing', 'dribbling', 'defending', 'physic', 'gk_diving', 'gk_handling', 'gk_kicking', 'gk_reflexes', 'gk_speed', 'gk_positioning', 'player_traits', 'attacking_crossing', 'attacking_finishing', 'attacking_heading_accuracy', 'attacking_short_passing', 'attacking_volleys', 'skill_dribbling', 'skill_curve', 'skill_fk_accuracy', 'skill_long_passing', 'skill_ball_control', 'movement_acceleration', 'movement_sprint_speed', 'movement_agility', 'movement_reactions', 'movement_balance', 'power_shot_power', 'power_jumping', 'power_stamina', 'power_strength', 'power_long_shots', 'mentality_aggression', 'mentality_interceptions', 'mentality_positioning', 'mentality_vision', 'mentality_penalties', 'mentality_composure', 'defending_marking', 'defending_standing_tackle', 'defending_sliding_tackle', 'goalkeeping_diving', 'goalkeeping_handling', 'goalkeeping_kicking', 'goalkeeping_positioning', 'goalkeeping_reflexes']]

In [5]:
data_frames = [df_fifa16_clean, df_fifa17_clean, df_fifa18_clean, df_fifa19_clean, df_fifa20_clean]

In [6]:
df = df_fifa20_clean.append([df_fifa16_clean, df_fifa17_clean, df_fifa18_clean, df_fifa19_clean])
df.head()

Unnamed: 0,short_name,overall,club,value_eur,wage_eur,player_positions,year,potential,preferred_foot,international_reputation,weak_foot,skill_moves,team_position,team_jersey_number,joined,contract_valid_until,pace,shooting,passing,dribbling,defending,physic,gk_diving,gk_handling,gk_kicking,gk_reflexes,gk_speed,gk_positioning,player_traits,attacking_crossing,attacking_finishing,attacking_heading_accuracy,attacking_short_passing,attacking_volleys,skill_dribbling,skill_curve,skill_fk_accuracy,skill_long_passing,skill_ball_control,movement_acceleration,movement_sprint_speed,movement_agility,movement_reactions,movement_balance,power_shot_power,power_jumping,power_stamina,power_strength,power_long_shots,mentality_aggression,mentality_interceptions,mentality_positioning,mentality_vision,mentality_penalties,mentality_composure,defending_marking,defending_standing_tackle,defending_sliding_tackle,goalkeeping_diving,goalkeeping_handling,goalkeeping_kicking,goalkeeping_positioning,goalkeeping_reflexes
0,L. Messi,94,FC Barcelona,95500000,565000,"RW, CF, ST",19/20,94,Left,5,4,4,RW,10.0,2004-07-01,2021.0,87.0,92.0,92.0,96.0,39.0,66.0,,,,,,,"Beat Offside Trap, Argues with Officials, Earl...",88,95,70,92,88,97,93,94,92,96,91,84,93,95,95,86,68,75,68,94,48,40,94,94,75,96,33,37,26,6,11,15,14,8
1,Cristiano Ronaldo,93,Juventus,58500000,405000,"ST, LW",19/20,93,Right,5,4,5,LW,7.0,2018-07-10,2022.0,90.0,93.0,82.0,89.0,35.0,78.0,,,,,,,"Long Throw-in, Selfish, Argues with Officials,...",84,94,89,83,87,89,81,76,77,92,89,91,87,96,71,95,95,85,78,93,63,29,95,82,85,95,28,32,24,7,11,15,14,11
2,Neymar Jr,92,Paris Saint-Germain,105500000,290000,"LW, CAM",19/20,92,Right,5,5,5,CAM,10.0,2017-08-03,2022.0,91.0,85.0,87.0,95.0,32.0,58.0,,,,,,,"Power Free-Kick, Injury Free, Selfish, Early C...",87,87,62,87,87,96,88,87,81,95,94,89,96,92,84,80,61,81,49,84,51,36,87,90,90,94,27,26,29,9,9,15,15,11
3,J. Oblak,91,Atlético Madrid,77500000,125000,GK,19/20,93,Right,3,3,1,GK,13.0,2014-07-16,2023.0,,,,,,,87.0,92.0,78.0,89.0,52.0,90.0,"Flair, Acrobatic Clearance",13,11,15,43,13,12,13,14,40,30,43,60,67,88,49,59,78,41,78,12,34,19,11,65,11,68,27,12,18,87,92,78,90,89
4,E. Hazard,91,Real Madrid,90000000,470000,"LW, CF",19/20,91,Right,4,4,4,LW,7.0,2019-07-01,2024.0,91.0,83.0,86.0,94.0,35.0,66.0,,,,,,,"Beat Offside Trap, Selfish, Finesse Shot, Spee...",81,84,61,89,83,95,83,79,83,94,94,88,95,90,94,82,56,84,63,80,54,41,87,89,88,91,34,27,22,11,12,6,8,8


In [7]:
df['player_position'] = df['player_positions']
df['player_position'] = df['player_positions'].str.split(',').str[0]
df['player_position'].unique()

array(['RW', 'ST', 'LW', 'GK', 'CAM', 'CB', 'CM', 'CDM', 'CF', 'LB', 'RB',
       'RM', 'LM', 'LWB', 'RWB'], dtype=object)

In [8]:
df['player_position'] = df['player_position'].replace(to_replace =['RB', 'LB','LWB','RWB'], value='OB')
df['player_position'] = df['player_position'].replace(to_replace =['CAM','CDM', 'CM'], value='CM')
df['player_position'] = df['player_position'].replace(to_replace =['LM', 'RM', 'RW', 'LW'], value='W')
df['player_position'] = df['player_position'].replace(to_replace =['CF', 'ST'], value='ST')
df['player_position'] = df['player_position'].replace(to_replace =['GK'], value='GK')
df['player_position'] = df['player_position'].astype(str)

In [9]:
df['player_position'].unique()

array(['W', 'ST', 'GK', 'CM', 'CB', 'OB'], dtype=object)

In [11]:
path = r'C:\Users\aaron\prem_league_analytics\data\ '
df.to_csv(path+'fifa-all.csv')