In [1]:
import requests
import json
import sys
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import plotly.express as px
import time
from ipywidgets import interact

In [2]:
try:
  import pycountry

except:
  !pip install pycountry
  import pycountry

Collecting pycountry
  Downloading pycountry-22.3.5.tar.gz (10.1 MB)
[K     |████████████████████████████████| 10.1 MB 4.7 MB/s 
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
    Preparing wheel metadata ... [?25l[?25hdone
Building wheels for collected packages: pycountry
  Building wheel for pycountry (PEP 517) ... [?25l[?25hdone
  Created wheel for pycountry: filename=pycountry-22.3.5-py2.py3-none-any.whl size=10681845 sha256=639dd00951c892683d6d54d9e78cb7a4077b1cf0727f9ef310b5c54eddaf2c18
  Stored in directory: /root/.cache/pip/wheels/0e/06/e8/7ee176e95ea9a8a8c3b3afcb1869f20adbd42413d4611c6eb4
Successfully built pycountry
Installing collected packages: pycountry
Successfully installed pycountry-22.3.5


In [3]:
# Requesting JSON from URL

url = 'https://www.ag-grid.com/example-assets/olympic-winners.json'

r = requests.get(url)

json_file = json.loads(r.text)



In [4]:
# Setting pandas options

pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

In [5]:
# Converting JSON to Pandas DataFrame

dataFrame = pd.DataFrame(json_file, columns = ["athlete","age","country","year","date","sport","gold","silver","bronze","total"])

dataFrame

Unnamed: 0,athlete,age,country,year,date,sport,gold,silver,bronze,total
0,Michael Phelps,23.0,United States,2008,24/08/2008,Swimming,8,0,0,8
1,Michael Phelps,19.0,United States,2004,29/08/2004,Swimming,6,0,2,8
2,Michael Phelps,27.0,United States,2012,12/08/2012,Swimming,4,2,0,6
3,Natalie Coughlin,25.0,United States,2008,24/08/2008,Swimming,1,2,3,6
4,Aleksey Nemov,24.0,Russia,2000,01/10/2000,Gymnastics,2,1,3,6
...,...,...,...,...,...,...,...,...,...,...
8613,Olena Sadovnycha,32.0,Ukraine,2000,01/10/2000,Archery,0,1,0,1
8614,Kateryna Serdiuk,17.0,Ukraine,2000,01/10/2000,Archery,0,1,0,1
8615,Wietse van Alten,21.0,Netherlands,2000,01/10/2000,Archery,0,0,1,1
8616,Sandra Wagner-Sachse,31.0,Germany,2000,01/10/2000,Archery,0,0,1,1


In [6]:
# Grouping data by athletes

data = dataFrame[["athlete","country","sport","gold","silver","bronze","total"]]
athlete_group = data.groupby(["athlete"]).max()

athlete_group

Unnamed: 0_level_0,country,sport,gold,silver,bronze,total
athlete,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
,Brazil,Volleyball,0,1,0,1
A. J. Mleczko,United States,Ice Hockey,0,1,0,1
Aaron Armstrong,Trinidad and Tobago,Athletics,0,1,0,1
Aaron Egbele,Nigeria,Athletics,0,0,1,1
Aaron Gate,New Zealand,Cycling,0,0,1,1
...,...,...,...,...,...,...
Øystein Pettersen,Norway,Cross Country Skiing,1,0,0,1
Šarunas Jasikevicius,Lithuania,Basketball,0,0,1,1
Štepánka Hilgertová,Czech Republic,Canoeing,1,0,0,1
Šárka Záhrobská,Czech Republic,Alpine Skiing,0,0,1,1


In [12]:
# Group data by country and sports

data = pd.DataFrame(json_file, columns = ["country","sport","gold","silver","bronze","total"])
groupedData = data.groupby(["country","sport"]).sum()
groupedData = groupedData.reset_index()
groupedData.rename(columns={'total': 'Total Medals'}, inplace=True)

# Plot bar graph

def plot_total_medals(country):

  plot = groupedData[groupedData["country"] == country]
  fig = px.bar(plot, x="sport", y="Total Medals", barmode='group',title="{0} Medals by Sport".format(country),color='Total Medals')
  fig.show()


countries = []
country_list = groupedData.country.unique()
for country in country_list:
  countries.append(country)


interact(plot_total_medals, country=countries)

interactive(children=(Dropdown(description='country', options=('Afghanistan', 'Algeria', 'Argentina', 'Armenia…

<function __main__.plot_total_medals>

In [8]:
athlete_data = pd.DataFrame(json_file, columns = ["athlete","country","sport","gold","year","silver","bronze","total"])

def plot_athletes_sunburst(country):

  data = athlete_data[athlete_data["country"] == country]
  fig = px.sunburst(data, path=['year', 'sport', 'athlete'], values='total', color="sport")
  fig.show()

interact(plot_athletes_sunburst, country=countries)

interactive(children=(Dropdown(description='country', options=('Afghanistan', 'Algeria', 'Argentina', 'Armenia…

<function __main__.plot_athletes_sunburst>

In [9]:
# Group by country and year


dataFrame = pd.DataFrame(json_file, columns = ["country","year","sport","gold","silver","bronze","total"])
groupedData = dataFrame.groupby(["country","year"]).sum()
groupedData = groupedData.reset_index()


# Get alpha code from countries 

country_dict = {}

for country in groupedData.country.unique():
  country_stats =  pycountry.countries.get(name=country)
  try:
    country_dict[country] = country_stats.alpha_3
  except:
    country_dict[country] = None


# Add alpha code column to dataframe

for index, row in groupedData.iterrows():
    groupedData.at[index,'Alpha Code'] = country_dict[row["country"]]


In [11]:
# Plot country map 

fig = px.scatter_geo(groupedData, locations="Alpha Code", color="country",
                     hover_name="country", size="total",
                     projection="natural earth", animation_frame="year")
fig.show()