In [170]:
# Load Packages
import pandas as pd 
import numpy as np
from bokeh.io import output_notebook, show, curdoc
output_notebook()
from bokeh.plotting import figure
from bokeh.layouts import layout, column, row
from bokeh.models import (Button, CategoricalColorMapper, ColumnDataSource, BoxAnnotation,
                          HoverTool, Label, SingleIntervalTicker, Slider,)
from bokeh.palettes import Spectral10, all_palettes


In [13]:
regions = pd.read_csv('/Users/alexandredo/Desktop/Git/DataViz/Data/Olympic/noc_regions.csv')
df =  pd.read_csv('/Users/alexandredo/Desktop/Git/DataViz/Data/Olympic/athlete_events.csv')
country_continent = pd.read_csv('/Users/alexandredo/Desktop/Git/DataViz/Data/Olympic/countryContinent.csv', encoding='latin-1')
list_name_country = {noc:name_country for (noc, name_country) in zip( list(regions.NOC), list(regions.region)) }
df['region'] =  df.NOC.replace(list_name_country)
df.fillna({'Medal':'NoMedal'} , inplace=True)

In [18]:
df_summer = df[df.Season=='Summer']
list_years = list(sorted(df_summer.Year.unique(), reverse=True))
year_city = df_summer[['Year', 'City']].drop_duplicates()
list_city = { year:city for year,city in zip(list(year_city.Year), list(year_city.City) ) }
list_num_country = df_summer[['Year', 'region']].drop_duplicates().groupby('Year').count()
list_num_athlete = df_summer[['Year', 'Name']].drop_duplicates().groupby('Year').count()
list_num_sport = df_summer[['Year', 'Sport']].drop_duplicates().groupby('Year').count()

country_medal = df_summer[['Year', 'Event', 'Medal', 'region']].drop_duplicates()
country_medal = country_medal.groupby(['Year', 'region'])['Medal'].value_counts().to_frame('Count').reset_index()

country_rank = country_medal[country_medal.Medal=='Gold'].drop(columns=['Medal']).rename(columns={'Count':'Number Gold'})
country_rank['Number Silver'] = list(country_rank.Year.astype(str) + country_rank.region)
country_rank['Number Bronze'] = list(country_rank.Year.astype(str) + country_rank.region)

country_medal_silver = country_medal[country_medal.Medal=='Silver']
dict_silver = { year_country:count for year_country, count in zip(list(country_medal_silver.Year.astype(str) + country_medal_silver.region), list(country_medal_silver.Count)) }
country_medal_bronze = country_medal[country_medal.Medal=='Bronze']
dict_bronze = { year_country:count for year_country, count in zip(list(country_medal_bronze.Year.astype(str) + country_medal_bronze.region), list(country_medal_bronze.Count)) } 

country_rank['Number Silver'].replace(dict_silver, inplace=True)
country_rank['Number Silver'] = list(map( lambda x: 0 if(isinstance(x, str)) else x ,  list(country_rank['Number Silver'])))
country_rank['Number Bronze'].replace(dict_bronze, inplace=True)
country_rank['Number Bronze'] = list(map( lambda x: 0 if(isinstance(x, str)) else x ,  list(country_rank['Number Bronze'])))

country_rank['Number athletes'] = list(country_rank.Year.astype(str) + country_rank.region)
country_athletes = df_summer[['Year', 'region', 'Name']].drop_duplicates().groupby(['Year', 'region']).count().reset_index()
dict_athletes = { year_country:count for year_country, count in zip(list(country_athletes.Year.astype(str) + country_athletes.region), list(country_athletes.Name)) } 
country_rank['Number athletes'].replace(dict_athletes, inplace=True)
dict_continent = {country:continent for (country, continent) in zip(list(country_continent.country), list(country_continent.continent) )}
dict_continent['USA'] = 'Americas'
dict_continent['UK'] = 'Europe'
dict_continent['South Korea'] = 'Asia'
dict_continent['Iran'] = 'Asia'
dict_continent['Taiwan'] = 'Asia'
dict_continent['Russia'] = 'Europe-Asia'
dict_continent['France'] = 'France'

country_rank['Continent'] = country_rank.region.replace(dict_continent)

data={}
for year in list_years:
    country_rank_year = country_rank[country_rank.Year==year].sort_values(['Number Gold', 'Number Silver', 'Number Bronze'], ascending=False).reset_index()
    country_rank_year.drop(columns=['index'], inplace=True)
    country_rank_year['Rank'] = list(country_rank_year.index +1)
    country_rank_year['Size'] = country_rank_year['Number athletes']/12
    data[year] = country_rank_year

In [243]:
def modify_doc(doc):
    from bokeh.layouts import layout
    source = ColumnDataSource(data=data[list_years[-1]])
    plot = figure( 
               x_range = (-10, 90),
               y_range = (-15, 70), 
                title='Olympic',
                plot_height=300,
                plot_width=500 

                 )
    plot.xaxis.ticker = SingleIntervalTicker(interval=5)
    plot.xaxis.axis_label = "Rank"
    plot.yaxis.ticker = SingleIntervalTicker(interval=5)
    plot.yaxis.axis_label = "Number of gold medals"
    #color_mapper = CategoricalColorMapper(palette=Spectral10, factors=['Oceania', 'Europe', 'France', 'Americas', 'Europe-Asia', 'Africa','Asia'])
    color_mapper = CategoricalColorMapper(palette= 10*Spectral10, factors=list(country_rank.region.unique()))
    plot.circle('Rank',
                'Number Gold',
                size='Size', 
                fill_alpha=0.8,
                line_width=0.5,
                line_alpha=0.5,
                line_color='#7c7e71',
                fill_color={'field': 'region', 'transform': color_mapper},
                legend='region',
                source=source)
    
    plot.legend.label_text_font_size = '7pt'
    
    label_year = Label(x=30, y=45, text='Year : ' + str(list_years[-1]), text_font_size='40px', text_color='dimgray')
    plot.add_layout(label_year)
    
    label_city = Label(x=25, y=35, text='Host city : ' + list_city[list_years[-1]], text_font_size='35px', text_color='dimgray')
    plot.add_layout(label_city)
    
    label_num_country = Label(x=30, y=25, text='Number of nations : ' + str(int(list_num_country.loc[list_years[-1]])), text_font_size='25px', text_color='dimgray')
    plot.add_layout(label_num_country)
    
    label_num_athlete = Label(x=30, y=20, text='Number of athletes : ' + str(int(list_num_athlete.loc[list_years[-1]])), text_font_size='25px', text_color='dimgray')
    plot.add_layout(label_num_athlete)
    
    label_num_sport = Label(x=30, y=15, text='Number of sports : ' + str(int(list_num_sport.loc[list_years[-1]])), text_font_size='25px', text_color='dimgray')
    plot.add_layout(label_num_sport)
    
    plot.add_tools(HoverTool(tooltips="@region", show_arrow=False, point_policy='follow_mouse'))

    #show(plot)

    def animate_update():
        index = list_years.index(slider.value)
        index -=1
        if index <0:
            index = len(list_years)-1
        slider.value = list_years[index]


    def slider_update(attrname, old, new):
        value = slider.value
        year = [y for y in list_years if y >= value][-1] 
        
        label_year.text = 'Year : ' + str(year)
        label_city.text = 'Host city : ' + list_city[year]
        label_num_country.text = 'Number of nations : ' + str(int(list_num_country.loc[year]))
        label_num_athlete.text = 'Number of athletes : ' + str(int(list_num_athlete.loc[year]))
        label_num_sport.text = 'Number of sports : ' + str(int(list_num_sport.loc[year]))

        source.data= data[year]

    slider = Slider(start=list_years[-1], end=list_years[0], value=list_years[-1], step=4, title="Year")
    slider.on_change('value', slider_update)
    callback_id = None
    
    def animate():
        global callback_id
        if button.label == '► Play':
            button.label = '❚❚ Pause'
            callback_id = doc.add_periodic_callback(animate_update, 800)
        else:
            button.label = '► Play'
            doc.remove_periodic_callback(callback_id)

    button = Button(label='► Play', width=60)
    button.on_click(animate)
    
    layout = layout([[plot],[slider, button],], sizing_mode='scale_width')
    doc.add_root(layout)
    
show(modify_doc)

In [35]:
# creat dataframe containing the France's medal records and H/F participant records by years. 
df_summer = df[df.Season=='Summer']
list_years = list(sorted(df_summer.Year.unique(), reverse=True))
df_year_medal = df_summer[df_summer.region=='France'][['Year', 'Event', 'Medal']].drop_duplicates()
df_year_medal = df_year_medal.groupby('Year')['Medal'].value_counts().to_frame('Count')

df_year_sex = df_summer[df_summer.region=='France'][['Year', 'Name', 'Sex']].drop_duplicates()
df_year_sex = df_year_sex.groupby('Year')['Sex'].value_counts().to_frame('Count')

france_records = pd.DataFrame(index=list_years)
france_records['Number Gold'] = list(df_year_medal.loc[[(year, 'Gold') for year in list_years]].fillna(0)['Count'])
france_records['Number Silver'] = list(df_year_medal.loc[[(year, 'Silver') for year in list_years]].fillna(0)['Count'])
france_records['Number Bronze'] = list(df_year_medal.loc[[(year, 'Bronze') for year in list_years]].fillna(0)['Count'])

france_records['Number M'] = list(df_year_sex.loc[[(year, 'M') for year in list_years]].fillna(0)['Count'])
france_records['Number F'] = list(df_year_sex.loc[[(year, 'F') for year in list_years]].fillna(0)['Count'])

france_records['Number M percent'] = france_records['Number M']/(france_records['Number M'] + france_records['Number F'])
france_records['Number F percent'] = france_records['Number F']/(france_records['Number M'] + france_records['Number F'])

Passing list-likes to .loc or [] with any missing label will raise
KeyError in the future, you can use .reindex() as an alternative.

See the documentation here:
https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#deprecate-loc-reindex-listlike
  # This is added back by InteractiveShellApp.init_path()
Passing list-likes to .loc or [] with any missing label will raise
KeyError in the future, you can use .reindex() as an alternative.

See the documentation here:
https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#deprecate-loc-reindex-listlike
  del sys.path[0]
Passing list-likes to .loc or [] with any missing label will raise
KeyError in the future, you can use .reindex() as an alternative.

See the documentation here:
https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#deprecate-loc-reindex-listlike
  app.launch_new_instance()


In [282]:
from bokeh.io import output_file, show
from bokeh.models import ColumnDataSource
from bokeh.palettes import GnBu3, OrRd3
from bokeh.plotting import figure


#years = list(map(str, list_years))
years = list_years
medal_names = ['Number Gold', 'Number Silver', 'Number Bronze']
sex_names = ['Male', 'Female']

hovermedal = zip( list(france_records['Number Gold']), list(france_records['Number Silver']), list(france_records['Number Bronze']))
hovermedal = ['{:d} {:d} {:d}'.format(int(g), int(s), int(b)) for (g,s,b) in hovermedal]

hoversex = ['{}% '.format(f) for f in  list(round(100*france_records['Number F percent'], 1))]

medal = {'years' : years,
           'Number Gold'   : list(france_records['Number Gold']),
           'Number Silver'   : list(france_records['Number Silver']),
           'Number Bronze'   : list(france_records['Number Bronze']),
           'hovermedal' : hovermedal,
           'hoversex' : hoversex
        }
sex = {'years': years,
        'Male' : list(-40*france_records['Number M percent']),
        'Female': list(-40*france_records['Number F percent']),
       'hovermedal' : hovermedal,
        'hoversex' : hoversex
                           }
           
p = figure(
           y_range=(1880, 2020),
           plot_height=700,
           x_range=(-60, 125),
           title="France's records at the Olympics game",
           toolbar_location=None)

p.add_tools(HoverTool(tooltips=[("Year","@years"),("Female", "@hoversex"), ('Number of medals' ,'@hovermedal')] , show_arrow=False, point_policy='follow_mouse'))
#p.add_tools(HoverTool(tooltips='@hovermedal', show_arrow=False, point_policy='follow_mouse'))


p.hbar_stack(medal_names, y='years', height=1.6, alpha=1, color=['yellow', 'lightgray','goldenrod'],line_color="grey", source=ColumnDataSource(medal)
             , legend=['Gold', 'Silver', 'Bronze'])

p.hbar_stack(sex_names, y='years', height=1, alpha=0.9, color=['white', 'deeppink'],
             #line_color="blue",
             source=ColumnDataSource(sex),
             legend=['', 'Female %'])

box = BoxAnnotation(left=-60, right=0, fill_color='seashell', fill_alpha=0.3)
p.add_layout(box)

#p.y_range.range_padding = 0.1
p.ygrid.grid_line_color = None
p.legend.location = "top_right"
p.axis.minor_tick_line_color = None
p.outline_line_color = None
p.xaxis.bounds = (0, 120)
p.yaxis.axis_label = 'Year'
p.xaxis.axis_label = 'Compostion of team (femal-male)    Number of medals' 
label_F = Label(x=60, y=1950, text='F', text_font_size='65px', text_color='grey', text_alpha=0.4)
label_R = Label(x=75, y=1950, text='R', text_font_size='65px', text_color='grey',text_alpha=0.4)
label_A = Label(x=90, y=1950, text='A', text_font_size='65px', text_color='grey', text_alpha=0.4)

p.add_layout(label_F)
p.add_layout(label_R)
p.add_layout(label_A)
show(p)