## Loading Data

In [1]:
import utils
import pandas as pd
import altair as alt

# WARNING: This will lead to extremely large notebooks as DataFrames size grows
alt.data_transformers.disable_max_rows()
# However this piece of code will reduce that
# alt.data_transformers.enable('json')

DataTransformerRegistry.enable('default')

In [8]:
players = utils.load_df('players.feather')
player_attrs = utils.load_df('player_attrs.feather')
teams = utils.load_df('teams.feather')
team_attrs = utils.load_df('team_attrs.feather')
countries = utils.load_df('countries.feather')
leagues = utils.load_df('leagues.feather')
matches = utils.load_df('matches.feather')

In [9]:
players.head()

Unnamed: 0,id,player_api_id,player_name,birthday,height,weight
0,1,505942,Aaron Appindangoye,1992-02-29 00:00:00,182.88,187
1,2,155782,Aaron Cresswell,1989-12-15 00:00:00,170.18,146
2,3,162549,Aaron Doran,1991-05-13 00:00:00,170.18,163
3,4,30572,Aaron Galindo,1982-05-08 00:00:00,182.88,198
4,5,23780,Aaron Hughes,1979-11-08 00:00:00,182.88,154


In [4]:
player_attrs.head()


Unnamed: 0,id,player_api_id,date,overall_rating,potential,preferred_foot,attacking_work_rate,defensive_work_rate,crossing,finishing,...,vision,penalties,marking,standing_tackle,sliding_tackle,gk_diving,gk_handling,gk_kicking,gk_positioning,gk_reflexes
0,1,505942,2016-02-18 00:00:00,67.0,71.0,right,medium,medium,49.0,44.0,...,54.0,48.0,65.0,69.0,69.0,6.0,11.0,10.0,8.0,8.0
1,2,505942,2015-11-19 00:00:00,67.0,71.0,right,medium,medium,49.0,44.0,...,54.0,48.0,65.0,69.0,69.0,6.0,11.0,10.0,8.0,8.0
2,3,505942,2015-09-21 00:00:00,62.0,66.0,right,medium,medium,49.0,44.0,...,54.0,48.0,65.0,66.0,69.0,6.0,11.0,10.0,8.0,8.0
3,4,505942,2015-03-20 00:00:00,61.0,65.0,right,medium,medium,48.0,43.0,...,53.0,47.0,62.0,63.0,66.0,5.0,10.0,9.0,7.0,7.0
4,5,505942,2007-02-22 00:00:00,61.0,65.0,right,medium,medium,48.0,43.0,...,53.0,47.0,62.0,63.0,66.0,5.0,10.0,9.0,7.0,7.0


## Top X Players By Rating

In [10]:
X = 10
player_avg_ratings = player_attrs.groupby('player_api_id')[['overall_rating', 'potential']].mean()
sorted_player_avg_ratings = player_avg_ratings.sort_values('overall_rating', ascending=False)
top_players_attrs = sorted_player_avg_ratings[:X]
worst_players_attrs = sorted_player_avg_ratings[-X:]
top_players = top_players_attrs.merge(players, on='player_api_id')
worst_players = worst_players_attrs.merge(players, on='player_api_id')

# Limit Y Axis to 0 to 100 by passing this to the encoding param `scale`
zero_to_100_axis = alt.Scale(domain=[0, 100])

# Top
top_chart = alt.Chart(
    top_players,
    # width=500,
    title=f'Top {X} Players By Rating' 
).mark_bar().encode(
    y=alt.Y('player_name',sort='-x'),
    x=alt.X('overall_rating', scale=zero_to_100_axis),
) 
top_chart |= alt.Chart(
    top_players,
    # width=500,
    title=f'Top {X} Players By Potential'
).mark_bar().encode(
    y=alt.Y('player_name',sort='-x'),
    x=alt.X('potential', scale=zero_to_100_axis),
)

# Bottom 
bottom_chart = alt.Chart(
    worst_players,
    # width=500,
    title=f'Worst {X} Players By Rating'
).mark_bar(color='firebrick').encode(
    y=alt.Y('player_name',sort='-x',),
    x=alt.X('overall_rating', scale=zero_to_100_axis),
)

bottom_chart |= alt.Chart(
    worst_players,
    # width=500,
    title=f'Worst {X} Players By Potential'
).mark_bar(color='firebrick').encode(
    y=alt.Y('player_name',sort='-x',),
    x=alt.X('potential', scale=zero_to_100_axis),
)
top_chart & bottom_chart

## Heatmap

In [5]:
utils.alt_corr_plot(player_attrs, corr_limit=0.9, box_size=50, annot_size=10)

In [6]:
alt.Chart(player_attrs[:2000]).mark_circle().encode(x='sprint_speed', y='acceleration', color='count()')

In [7]:
utils.alt_corr_plot(matches, corr_limit=0.95, box_size=20, annot_size=5)

## Countries Wins over Time

In [29]:
countries.head()

Unnamed: 0,id,name
0,1,Belgium
1,1729,England
2,4769,France
3,7809,Germany
4,10257,Italy


In [30]:
columns_of_interest = ['country_id', 'season', 'home_team_goal', 'away_team_goal', 'home_team_api_id', 'away_team_api_id']
matches_matches[columns_of_interest]



Unnamed: 0,id,country_id,season,date,home_team_goal,away_team_goal,home_team_api_id,away_team_api_id
0,1,1,2008/2009,2008-08-17 00:00:00,1,1,9987,9993
1,2,1,2008/2009,2008-08-16 00:00:00,0,0,10000,9994
2,3,1,2008/2009,2008-08-16 00:00:00,0,3,9984,8635
3,4,1,2008/2009,2008-08-17 00:00:00,5,0,9991,9998
4,5,1,2008/2009,2008-08-16 00:00:00,1,3,7947,9985
...,...,...,...,...,...,...,...,...
25974,25975,24558,2015/2016,2015-09-22 00:00:00,1,0,10190,10191
25975,25976,24558,2015/2016,2015-09-23 00:00:00,1,2,9824,10199
25976,25977,24558,2015/2016,2015-09-23 00:00:00,2,0,9956,10179
25977,25978,24558,2015/2016,2015-09-22 00:00:00,0,0,7896,10243
