In [3]:
import plotly.plotly as py
import pandas as pd
from datetime import datetime

In [4]:
def lookup(s):
    """
    A fast approach to datetime parsing.Rather than re-parse these, we store all unique
    dates, parse them, and use a lookup to convert all dates.
    """
    dates = {date:pd.to_datetime(date, format='%Y%m%d', errors='ignore') for date in s.unique()}
    return s.map(dates)

In [5]:
def import_atp(Y):
    # Imports
    atp_cols= {'Date':int, 'Rank':int,'Player_id':str,'Points':str}
    tenn=pd.read_csv("atp_rankings_{}s.csv".format(Y), names=atp_cols.keys(), dtype=atp_cols, low_memory = False, na_values=['O'])
    #Transformation au format Date
    tenn['Date']=lookup(tenn['Date'])
    #ajout d'une colonne year
    tenn['Year']=tenn.Date.dt.year
    #Récupération de la liste des années
    tenn_years=tenn.Year.unique()
    #identification de la date la plus ancienne par année
    oldest_dates=[]
    for ye in tenn_years:
        old=tenn[tenn['Year']==ye]['Date'].max()
        oldest_dates.append(old)
    #Réduction de la DB à une date par année
    tenn=tenn[tenn["Date"].isin(oldest_dates)]
    #Séléction du top 500 uniquement
    tenn=tenn[tenn['Rank']<=500]
    return tenn

In [6]:
atp_dec=['10','00','90','80']
#Import and concat the sources
for i, el in enumerate(atp_dec):
    if i ==0 : atp=import_atp(el)
    else : atp=pd.concat([atp, import_atp(el)],axis=0)

In [7]:
#Import the players db
players_cols= {'Player_id':str, 'First_name':str, 'Last_name':str,'Hand':str, 
               'Birth_date':int, 'Country_code':str}
players=pd.read_csv('atp_players.csv', names=players_cols.keys(), dtype={'Player_id':str},
                    encoding='latin-1')
players.Birth_date=players.Birth_date.fillna(0).astype(int)
players.Birth_date=lookup(players.Birth_date)
#Import the countries DB
atp_countries = pd.read_csv("ATP_countries.csv")

In [8]:
#Merge the DB
atp=atp.merge(players,on='Player_id', how='left')
atp=atp.merge(atp_countries, left_on='Country_code',right_on='ATP_Code',how='left')
#Drop non necessary columns
atp=atp.drop(['Country_code','ATP_Code'], axis=1)

### First Analisis for 2016###

In [9]:
atp2016_country=atp[atp.Year==2016].groupby(['Code']).count()['Player_id'].to_frame().reset_index()

In [10]:
df=pd.read_csv("world_countries.csv", usecols=['COUNTRY','CODE'])
df=df.merge(atp2016_country,left_on='CODE',right_on='Code', how='left')
df=df.drop('Code', axis=1)
df.Player_id=df.Player_id.fillna(0.)
df.head(2)

Unnamed: 0,COUNTRY,CODE,Player_id
0,Afghanistan,AFG,0.0
1,Albania,ALB,0.0


In [11]:
df.shape

(222, 3)

In [12]:
import plotly.plotly as py
code='CODE'
value='Player_id'
name='COUNTRY'
data = [ dict(
        type = 'choropleth',
        locations = df[code],
        z = df[value],
        text = df[name],
        colorscale = "Viridis",
        autocolorscale = False,
        reversescale = True,
        marker = dict(
            line = dict (
                color = 'rgb(255,255,255)',
                width = 1.5
            ) ),
        colorbar = dict(
            autotick = False,
            tickprefix = '',
            title = 'Nb of players'),
      ) ]

layout = dict(
    title = 'Top ATP500 in 2016',
    geo = dict(
        showframe = False,
        showcoastlines = False,
        projection = dict(
            type = 'Mercator'
        )
    )
)

fig = dict( data=data, layout=layout )
py.iplot( fig, validate=False, filename='d3-world-map' )

With Slider

In [13]:
atp.head(2)

Unnamed: 0,Date,Rank,Player_id,Points,Year,First_name,Last_name,Hand,Birth_date,Name,Code
0,2010-12-27,1,104745,12450,2010,Rafael,Nadal,L,1986-06-03 00:00:00,Spain,ESP
1,2010-12-27,2,103819,9145,2010,Roger,Federer,R,1981-08-08 00:00:00,Switzerland,CHE


In [24]:
atp_pc=atp.groupby(['Year','Name','Code']).count()['Player_id'].to_frame().reset_index().sort_values(by='Name')

In [25]:
atp_pc=atp_pc[atp_pc.Year>=2000]
atp_pc.head(2)

Unnamed: 0,Year,Name,Code,Player_id
1447,2007,Algeria,DZA,1
1313,2005,Algeria,DZA,2


In [3]:

%matplotlib inline

import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from geonamescache import GeonamesCache
from helpers import slug
from matplotlib.patches import Polygon
from matplotlib.collections import PatchCollection
from mpl_toolkits.basemap import Basemap

ModuleNotFoundError: No module named 'geonamescache'