In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 5GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/fifa-20-complete-player-dataset/players_16.csv
/kaggle/input/fifa-20-complete-player-dataset/players_15.csv
/kaggle/input/fifa-20-complete-player-dataset/players_18.csv
/kaggle/input/fifa-20-complete-player-dataset/players_19.csv
/kaggle/input/fifa-20-complete-player-dataset/players_20.csv
/kaggle/input/fifa-20-complete-player-dataset/teams_and_leagues.csv
/kaggle/input/fifa-20-complete-player-dataset/players_17.csv


In [2]:
df = pd.read_csv('/kaggle/input/fifa-20-complete-player-dataset/players_20.csv', usecols=['short_name', 'nationality', 'club', 'team_jersey_number', 'age', 'pace', 'player_positions', 'height_cm'])

In [3]:
df.head(5)

Unnamed: 0,short_name,age,height_cm,nationality,club,player_positions,team_jersey_number,pace
0,L. Messi,32,170,Argentina,FC Barcelona,"RW, CF, ST",10.0,87.0
1,Cristiano Ronaldo,34,187,Portugal,Juventus,"ST, LW",7.0,90.0
2,Neymar Jr,27,175,Brazil,Paris Saint-Germain,"LW, CAM",10.0,91.0
3,J. Oblak,26,188,Slovenia,Atlético Madrid,GK,13.0,
4,E. Hazard,28,175,Belgium,Real Madrid,"LW, CF",7.0,91.0


In [4]:
paises = ['Argentina', 'Brazil', 'England', 'Germany']
df_reduzido = df.loc[(df['team_jersey_number'] <= 30) & (df['nationality'].isin(paises))]
df_reduzido

Unnamed: 0,short_name,age,height_cm,nationality,club,player_positions,team_jersey_number,pace
0,L. Messi,32,170,Argentina,FC Barcelona,"RW, CF, ST",10.0,87.0
2,Neymar Jr,27,175,Brazil,Paris Saint-Germain,"LW, CAM",10.0,91.0
6,M. ter Stegen,27,187,Germany,FC Barcelona,GK,1.0,
12,H. Kane,25,188,England,Tottenham Hotspur,ST,10.0,70.0
13,Alisson,26,191,Brazil,Liverpool,GK,1.0,
...,...,...,...,...,...,...,...,...
18251,G. Sykes-Kenworthy,19,182,England,Bradford City,GK,30.0,
18255,C. Heath,18,188,England,Crewe Alexandra,CM,26.0,65.0
18256,S. Cartwright,18,185,England,Peterborough United,"CB, RB",30.0,61.0
18265,L. Offord,19,170,England,Crewe Alexandra,"CDM, RB",21.0,59.0


In [5]:
import plotly.express as px
import matplotlib as mpl

In [6]:
fig = px.histogram(df_reduzido, 
                   x="team_jersey_number", 
                   color="nationality", 
                   barmode="group", 
                   histnorm='probability',
                   color_discrete_sequence=px.colors.qualitative.Pastel,
                   labels={
                       'team_jersey_number': 'Número no uniforme',
                       'nationality': 'País'
                   },
                   title='Uso dos diferentes números no uniforme agrupado por países')
fig.update_layout(bargap=0.5)
fig.show()

In [7]:
df = df.sort_values("age")
sw = df['age'].sort_values()
sw_01 = (sw - sw.min()) / (sw.max() - sw.min())
sw_colors = {n: mpl.colors.rgb2hex(c) for n, c in zip(sw, mpl.cm.viridis(sw_01))}
fig = px.box(df,
             x="age",
             y="pace",
             color="age",
             category_orders={'sepal_width': sw.to_list()[::-1]},
             color_discrete_map=sw_colors,
             labels={
                       'pace': 'Ritmo',
                       'age': 'Idade'
                   },
            title="Influência da idade no ritmo do atleta")
fig.update_yaxes(rangemode="tozero")
fig.show()

In [8]:
df['position'] = df['player_positions'].str.split(",", n = 1, expand = True)[0]
print(px.colors.qualitative.T10)

['#4C78A8', '#F58518', '#E45756', '#72B7B2', '#54A24B', '#EECA3B', '#B279A2', '#FF9DA6', '#9D755D', '#BAB0AC']


In [9]:
fig = px.violin(df, 
                x="position",
                y="height_cm",
                color="position",
                labels={
                       'height_cm': 'Altura (em cm)',
                       'position': 'Posição'
                   },
                title="Relação entre altura e posição",
                color_discrete_map={
                "GK": "#4C78A8",
                "CB": "#72B7B2",
                "RB": "#72B7B2",
                "LB": "#72B7B2",
                "RWB": "#72B7B2",
                "LWB": "#72B7B2",
                "CDM": "#E45756",
                "CM": "#E45756",
                "CAM": "#E45756",
                "LM": "#E45756",
                "RM": "#E45756",
                "CF": "#F58518",
                "LW": "#F58518",
                "RW": "#F58518",
                "ST": "#F58518"},)
fig.update_xaxes(categoryarray = ["GK", "CB", "RB", "LB", "RWB", "LWB", "CDM", "CM", "CAM", "LM", "RM", "CF", "LW", "RW", "ST"])
fig.show()