### Project examining All-Time Olympic Medal counts
#### by Erika Harrell
#### August 2024

##### This data science project uses information on the all time number of medals won by country available on Wikipeida. This table is current through the Paris 2024 Summer Games. It conatins web scraping, data cleaning and the creation of data visualizations including maps and other figures. To identify countries for mapping, it uses the ISO-Alpha3 classification to map countries. 

In [52]:
# importing Python libraries
import requests 
import pandas as pd
from bs4 import BeautifulSoup
from io import StringIO
import numpy as np
import plotly.express as px
import country_converter as coco
#facilitate showing graphs when exporting notebook to HTML
import plotly.io as pio
pio.renderers.default="notebook"
#show all output in each cell
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [53]:
#scraping Wikipedia page
url = "https://en.wikipedia.org/wiki/All-time_Olympic_Games_medal_table"
#making GET request to get information (raw HTML info) from server using URL
response = requests.get(url)
#using BeautifulSoup's HTML parser over the raw HTML content to create a BeautifulSoup object
soup = BeautifulSoup(response.content, "html.parser")
#finding all text in BeautifulSoup object with table HTML tag
tables = soup.find_all("table")
#getting the second element of tables wihch is the first table on the Wikipedia page and
#use pandas read_html command to read it in
#have to wrap string version of table in StringIO command because read_html does not take string values directly
df1 = pd.read_html(StringIO(str(tables[1])))
# printing table
print(df1)

[                                            Team Summer Olympic Games  \
                                 Team (IOC code)                  No.   
0                              Afghanistan (AFG)                   16   
1                                  Albania (ALB)                   10   
2                                  Algeria (ALG)                   15   
3                                Argentina (ARG)                   26   
4                                  Armenia (ARM)                    8   
..                                           ...                  ...   
158       Individual Neutral Athletes (AIN)[AIN]                    1   
159      Independent Olympic Athletes (IOA)[IOA]                    3   
160  Independent Olympic Participants (IOP)[IOP]                    1   
161                        Mixed team (ZZX)[ZZX]                    3   
162                                       Totals                   30   

                                                 

In [54]:
#get information about df1
#get type of df1
type(df1)
#get length of df1 list
len(df1)
#get type of first element of df1
type(df1[0])

list

1

pandas.core.frame.DataFrame

In [55]:
#get dataframe out of list
df = df1[0]
type(df)

pandas.core.frame.DataFrame

In [56]:
# get information about dataframe
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 163 entries, 0 to 162
Data columns (total 16 columns):
 #   Column                                       Non-Null Count  Dtype 
---  ------                                       --------------  ----- 
 0   (Team, Team (IOC code))                      163 non-null    object
 1   (Summer Olympic Games, No.)                  163 non-null    int64 
 2   (Summer Olympic Games, Unnamed: 2_level_1)   163 non-null    int64 
 3   (Summer Olympic Games, Unnamed: 3_level_1)   163 non-null    int64 
 4   (Summer Olympic Games, Unnamed: 4_level_1)   163 non-null    int64 
 5   (Summer Olympic Games, Unnamed: 5_level_1)   163 non-null    int64 
 6   (Winter Olympic Games, No.)                  163 non-null    int64 
 7   (Winter Olympic Games, Unnamed: 7_level_1)   163 non-null    int64 
 8   (Winter Olympic Games, Unnamed: 8_level_1)   163 non-null    int64 
 9   (Winter Olympic Games, Unnamed: 9_level_1)   163 non-null    int64 
 10  (Winter Olympi

In [57]:
# dropping the last row of the dataframe
df = df.iloc[:-1]
print(df)

                                            Team Summer Olympic Games  \
                                 Team (IOC code)                  No.   
0                              Afghanistan (AFG)                   16   
1                                  Albania (ALB)                   10   
2                                  Algeria (ALG)                   15   
3                                Argentina (ARG)                   26   
4                                  Armenia (ARM)                    8   
..                                           ...                  ...   
157                          Zimbabwe (ZIM)[ZIM]                   15   
158       Individual Neutral Athletes (AIN)[AIN]                    1   
159      Independent Olympic Athletes (IOA)[IOA]                    3   
160  Independent Olympic Participants (IOP)[IOP]                    1   
161                        Mixed team (ZZX)[ZZX]                    3   

                                                  

In [58]:
# get column names of dataframe
df.columns
# get number of columns
len(df.columns)

MultiIndex([(                'Team',     'Team (IOC code)'),
            ('Summer Olympic Games',                 'No.'),
            ('Summer Olympic Games',  'Unnamed: 2_level_1'),
            ('Summer Olympic Games',  'Unnamed: 3_level_1'),
            ('Summer Olympic Games',  'Unnamed: 4_level_1'),
            ('Summer Olympic Games',  'Unnamed: 5_level_1'),
            ('Winter Olympic Games',                 'No.'),
            ('Winter Olympic Games',  'Unnamed: 7_level_1'),
            ('Winter Olympic Games',  'Unnamed: 8_level_1'),
            ('Winter Olympic Games',  'Unnamed: 9_level_1'),
            ('Winter Olympic Games', 'Unnamed: 10_level_1'),
            (      'Combined total',                 'No.'),
            (      'Combined total', 'Unnamed: 12_level_1'),
            (      'Combined total', 'Unnamed: 13_level_1'),
            (      'Combined total', 'Unnamed: 14_level_1'),
            (      'Combined total', 'Unnamed: 15_level_1')],
           )

16

In [59]:
#examining columns
#first row of dataframe
df.head(1)
#getting type of upper level columns
print(type(df['Team']))
print(type(df['Summer Olympic Games']))
print(type(df['Winter Olympic Games']))
print(type(df['Combined total']))


Unnamed: 0_level_0,Team,Summer Olympic Games,Summer Olympic Games,Summer Olympic Games,Summer Olympic Games,Summer Olympic Games,Winter Olympic Games,Winter Olympic Games,Winter Olympic Games,Winter Olympic Games,Winter Olympic Games,Combined total,Combined total,Combined total,Combined total,Combined total
Unnamed: 0_level_1,Team (IOC code),No.,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,No.,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,No.,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
0,Afghanistan (AFG),16,0,0,2,2,0,0,0,0,0,16,0,0,2,2


<class 'pandas.core.frame.DataFrame'>
<class 'pandas.core.frame.DataFrame'>
<class 'pandas.core.frame.DataFrame'>
<class 'pandas.core.frame.DataFrame'>


In [60]:
#creating new dataframe from nested dataframes in df
#working with Teams data frame
dfteam = df['Team']
#get type of dfteam
type(dfteam)
#get name of columns in dfteam
dfteam.columns
# get number of rows and columns in dfteam
dfteam.shape
#get first row of dfteam
dfteam.head(1)
#creating variable with Team name only by splitting Team (IOC code) variable
v1 = dfteam['Team (IOC code)'].str.split("(", expand=True)[0].to_frame()
#creating separate IOC code variable only by splitting Team (IOC code) variable
v2 = dfteam['Team (IOC code)'].str.split("(", expand=True)[1].to_frame()
#adding Team and IOC code variables to dfteam to create df1
df1 = pd.concat([dfteam,v1, v2], axis=1)
#look at first 5 rows of df1
df1.head()


pandas.core.frame.DataFrame

Index(['Team (IOC code)'], dtype='object')

(162, 1)

Unnamed: 0,Team (IOC code)
0,Afghanistan (AFG)


Unnamed: 0,Team (IOC code),0,1
0,Afghanistan (AFG),Afghanistan,AFG)
1,Albania (ALB),Albania,ALB)
2,Algeria (ALG),Algeria,ALG)
3,Argentina (ARG),Argentina,ARG)
4,Armenia (ARM),Armenia,ARM)


In [61]:
#clean up new columns
#get column names of df1
df1.columns
#rename 0 and 1 columns in df1
df1.rename(columns={0 : 'Team', 1 : 'IOCcode'}, inplace = True)
#check columns in df1
df1.columns

Index(['Team (IOC code)', 0, 1], dtype='object')

Index(['Team (IOC code)', 'Team', 'IOCcode'], dtype='object')

In [62]:
#check IOCcode column values
df1['IOCcode'].unique()
#drop extra text from IOC code column
df1['IOCcode'] = df1['IOCcode'].str[:3]
#check values on IOCcode
df1['IOCcode'].unique()

array(['AFG)', 'ALB)', 'ALG)', 'ARG)', 'ARM)', 'ANZ)[ANZ]',
       'AUS)[AUS][Z]', 'AUT)', 'AZE)', 'BAH)', 'BRN)', 'BAR)[BAR]',
       'BLR)', 'BEL)', 'BER)', 'BOH)[BOH][Z]', 'BOT)', 'BRA)',
       'BWI)[BWI]', 'BUL)[H]', 'BUR)', 'BDI)', 'CMR)', 'CAN)', 'CPV)',
       'CHI)[I]', 'CHN)[CHN]', 'COL)', 'CRC)', 'CIV)[CIV]', 'CRO)',
       'CUB)[Z]', 'CYP)', 'CZE)[CZE]', 'TCH)[TCH]', 'DEN)[Z]', 'DJI)[B]',
       'DMA)', 'DOM)', 'ECU)', 'EGY)[EGY][Z]', 'ERI)', 'EST)', 'ETH)',
       'FIJ)', 'FIN)', 'FRA)[O][P][Z]', 'GAB)', 'GEO)', 'GER)[GER] [Z]',
       'EUA)[EUA]', 'GDR)[GDR]', 'FRG)[FRG]', 'GHA)[GHA]', 'GBR)[GBR][Z]',
       'GRE)', 'GRN)', 'GUA)', 'GUY)[GUY]', 'HAI)[J]', 'HKG)[HKG]',
       'HUN)', 'ISL)', 'IND)[F]', 'INA)', 'IRI)[K]', 'IRQ)', 'IRL)',
       'ISR)', 'ITA)[M][S]', 'JAM)[JAM]', 'JPN)', 'JOR)', 'KAZ)', 'KEN)',
       'KOS)', 'PRK)', 'KOR)', 'KUW)', 'KGZ)', 'LAT)', 'LBN)', 'LIE)',
       'LTU)', 'LUX)[O]', 'MAS)[MAS]', 'MRI)', 'MEX)', 'MDA)', 'MGL)',
       'MNE)', 'MAR)', '

array(['AFG', 'ALB', 'ALG', 'ARG', 'ARM', 'ANZ', 'AUS', 'AUT', 'AZE',
       'BAH', 'BRN', 'BAR', 'BLR', 'BEL', 'BER', 'BOH', 'BOT', 'BRA',
       'BWI', 'BUL', 'BUR', 'BDI', 'CMR', 'CAN', 'CPV', 'CHI', 'CHN',
       'COL', 'CRC', 'CIV', 'CRO', 'CUB', 'CYP', 'CZE', 'TCH', 'DEN',
       'DJI', 'DMA', 'DOM', 'ECU', 'EGY', 'ERI', 'EST', 'ETH', 'FIJ',
       'FIN', 'FRA', 'GAB', 'GEO', 'GER', 'EUA', 'GDR', 'FRG', 'GHA',
       'GBR', 'GRE', 'GRN', 'GUA', 'GUY', 'HAI', 'HKG', 'HUN', 'ISL',
       'IND', 'INA', 'IRI', 'IRQ', 'IRL', 'ISR', 'ITA', 'JAM', 'JPN',
       'JOR', 'KAZ', 'KEN', 'KOS', 'PRK', 'KOR', 'KUW', 'KGZ', 'LAT',
       'LBN', 'LIE', 'LTU', 'LUX', 'MAS', 'MRI', 'MEX', 'MDA', 'MGL',
       'MNE', 'MAR', 'MOZ', 'NAM', 'NED', 'AHO', 'NZL', 'NIG', 'NGR',
       'MKD', 'NOR', 'PAK', 'PAN', 'PAR', 'PER', 'PHI', 'POL', 'POR',
       'PUR', 'QAT', 'EOR', 'ROU', 'RUS', 'RU1', 'URS', 'EUN', 'OAR',
       'ROC', 'SAM', 'LCA', 'SMR', 'KSA', 'SEN', 'SRB', 'SCG', 'SGP',
       'SVK', 'SLO',

In [63]:
#working on Summer columns
Summer = df['Summer Olympic Games']
#type of Summer
type(Summer)
#name of columns in Summer
Summer.columns
#number of rows and columns in Summer
Summer.shape
#get first row of Summer
Summer.head(1)
#add Summer to df1
df1 = pd.concat([df1,Summer], axis=1)
#rename Summer columns in df1 
df1.rename(columns={'No.' : 'Games_summer', 
                       'Unnamed: 2_level_1' : 'Gold_summer',
                         'Unnamed: 3_level_1' : 'Silver_summer', 
                         'Unnamed: 4_level_1':'Bronze_summer',
       'Unnamed: 5_level_1':'Total_summer'}, inplace = True)
#check columns in df1
df1.columns

pandas.core.frame.DataFrame

Index(['No.', 'Unnamed: 2_level_1', 'Unnamed: 3_level_1', 'Unnamed: 4_level_1',
       'Unnamed: 5_level_1'],
      dtype='object')

(162, 5)

Unnamed: 0,No.,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0,16,0,0,2,2


Index(['Team (IOC code)', 'Team', 'IOCcode', 'Games_summer', 'Gold_summer',
       'Silver_summer', 'Bronze_summer', 'Total_summer'],
      dtype='object')

In [64]:
#working on Winter columns
Winter = df['Winter Olympic Games']
#type of Winter
type(Winter)
#columns in Winter
Winter.columns
#number of rows and columns
Winter.shape
#first row in Winter
Winter.head(1)
#add Winter to df1
df1 = pd.concat([df1,Winter], axis=1)
#rename Winter columns in df1
df1.rename(columns={'No.' : 'Games_winter', 
                       'Unnamed: 7_level_1' : 'Gold_winter',
                         'Unnamed: 8_level_1' : 'Silver_winter', 
                         'Unnamed: 9_level_1':'Bronze_winter',
       'Unnamed: 10_level_1':'Total_winter'}, inplace = True)
#check columns
df1.columns

pandas.core.frame.DataFrame

Index(['No.', 'Unnamed: 7_level_1', 'Unnamed: 8_level_1', 'Unnamed: 9_level_1',
       'Unnamed: 10_level_1'],
      dtype='object')

(162, 5)

Unnamed: 0,No.,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
0,0,0,0,0,0


Index(['Team (IOC code)', 'Team', 'IOCcode', 'Games_summer', 'Gold_summer',
       'Silver_summer', 'Bronze_summer', 'Total_summer', 'Games_winter',
       'Gold_winter', 'Silver_winter', 'Bronze_winter', 'Total_winter'],
      dtype='object')

In [65]:
#working on Combined total columns
Combo = df['Combined total']
#type of Combo
type(Combo)
#columns in Combo
Combo.columns
#number of rows and columns
Combo.shape
#first row of Combo
Combo.head(1)
#add Combo columns to df1
df1 = pd.concat([df1,Combo], axis=1)
#rename Combo columns 
df1.rename(columns={'No.' : 'Games_total', 
                       'Unnamed: 12_level_1' : 'Gold_total',
                         'Unnamed: 13_level_1' : 'Silver_total', 
                         'Unnamed: 14_level_1':'Bronze_total',
       'Unnamed: 15_level_1':'Totalmedals'}, inplace = True)
#check columns
df1.columns

pandas.core.frame.DataFrame

Index(['No.', 'Unnamed: 12_level_1', 'Unnamed: 13_level_1',
       'Unnamed: 14_level_1', 'Unnamed: 15_level_1'],
      dtype='object')

(162, 5)

Unnamed: 0,No.,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
0,16,0,0,2,2


Index(['Team (IOC code)', 'Team', 'IOCcode', 'Games_summer', 'Gold_summer',
       'Silver_summer', 'Bronze_summer', 'Total_summer', 'Games_winter',
       'Gold_winter', 'Silver_winter', 'Bronze_winter', 'Total_winter',
       'Games_total', 'Gold_total', 'Silver_total', 'Bronze_total',
       'Totalmedals'],
      dtype='object')

In [66]:
#information about dataframe
df1.info()
#first row of dataframe
df1.head(1)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 162 entries, 0 to 161
Data columns (total 18 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   Team (IOC code)  162 non-null    object
 1   Team             162 non-null    object
 2   IOCcode          162 non-null    object
 3   Games_summer     162 non-null    int64 
 4   Gold_summer      162 non-null    int64 
 5   Silver_summer    162 non-null    int64 
 6   Bronze_summer    162 non-null    int64 
 7   Total_summer     162 non-null    int64 
 8   Games_winter     162 non-null    int64 
 9   Gold_winter      162 non-null    int64 
 10  Silver_winter    162 non-null    int64 
 11  Bronze_winter    162 non-null    int64 
 12  Total_winter     162 non-null    int64 
 13  Games_total      162 non-null    int64 
 14  Gold_total       162 non-null    int64 
 15  Silver_total     162 non-null    int64 
 16  Bronze_total     162 non-null    int64 
 17  Totalmedals      162 non-null    in

Unnamed: 0,Team (IOC code),Team,IOCcode,Games_summer,Gold_summer,Silver_summer,Bronze_summer,Total_summer,Games_winter,Gold_winter,Silver_winter,Bronze_winter,Total_winter,Games_total,Gold_total,Silver_total,Bronze_total,Totalmedals
0,Afghanistan (AFG),Afghanistan,AFG,16,0,0,2,2,0,0,0,0,0,16,0,0,2,2


In [67]:
#create column with iso alpha 3 codes for countries (Teams) to prep for mapping
#cases with 'not found' on iso_alpha3 gets recoded
df1['iso_alpha3'] = coco.convert(names = df1['Team'], to = 'ISO3', not_found = 'Missing')
df1['iso_alpha3'].head()
#check to see how many Teams have no ISO alpha 3 code
df1['no_iso3'] = df1['iso_alpha3'] == 'Missing'
df1['no_iso3'].value_counts()


Australasia  not found in regex
British West Indies  not found in regex
East Germany  not found in regex
West Germany  not found in regex
Netherlands Antilles  not found in regex
Refugee Olympic Team  not found in regex
Soviet Union  not found in regex
Unified Team  not found in regex
ROC  not found in regex
Serbia and Montenegro  not found in regex
Virgin Islands  not found in regex
Yugoslavia  not found in regex
Individual Neutral Athletes  not found in regex
Independent Olympic Athletes  not found in regex
Independent Olympic Participants  not found in regex
Mixed team  not found in regex


0    AFG
1    ALB
2    DZA
3    ARG
4    ARM
Name: iso_alpha3, dtype: object

no_iso3
False    146
True      16
Name: count, dtype: int64

In [68]:
#create column with iso alpha 2 codes for countries (Teams) 
#cases with 'not found' on iso_alpha2 gets recoded
df1['iso_alpha2'] = coco.convert(names = df1['Team'], to = 'ISO2', not_found = 'Missing')
df1['iso_alpha2'].head()
#check to see how many Teams have no ISO alpha 2 code
df1['no_iso2'] = df1['iso_alpha2'] == 'Missing'
df1['no_iso2'].value_counts()

Australasia  not found in regex
British West Indies  not found in regex
East Germany  not found in regex
West Germany  not found in regex
Netherlands Antilles  not found in regex
Refugee Olympic Team  not found in regex
Soviet Union  not found in regex
Unified Team  not found in regex
ROC  not found in regex
Serbia and Montenegro  not found in regex
Virgin Islands  not found in regex
Yugoslavia  not found in regex
Individual Neutral Athletes  not found in regex
Independent Olympic Athletes  not found in regex
Independent Olympic Participants  not found in regex
Mixed team  not found in regex


0    AF
1    AL
2    DZ
3    AR
4    AM
Name: iso_alpha2, dtype: object

no_iso2
False    146
True      16
Name: count, dtype: int64

In [69]:
#create continent variable
df1['continent'] = coco.convert(names=df1['Team'], to='Continent', not_found='Missing')
#check to see how many Teams have no continent code
df1['nocontinent'] = df1['continent'] == 'Missing'
df1['nocontinent'].value_counts()
#look at values for continent variable
df1['continent'].value_counts()
#Handle missing cases
df1['Team'] = df1['Team'].apply(lambda x: x.strip())
df1['continent'] = np.where(df1['Team'] ==  'British West Indies',  'America', df1['continent'])
df1['continent'] = np.where(df1['Team'] ==  'East Germany',  'Europe', df1['continent'])
df1['continent'] = np.where(df1['Team'] ==  'West Germany',  'Europe', df1['continent'])
df1['continent'] = np.where(df1['Team'] ==  'Netherlands Antilles',  'Europe', df1['continent'])
df1['continent'] = np.where(df1['Team'] ==  'Serbia and Montenegro',  'Europe', df1['continent'])
df1['continent'] = np.where(df1['Team'] ==  'Soviet Union',  'Europe', df1['continent'])
df1['continent'] = np.where(df1['Team'] ==  'Virgin Islands',  'America', df1['continent'])
df1['continent'] = np.where(df1['Team'] ==  'Yugoslavia',  'Europe', df1['continent'])
#check to see how many Teams have no continent code
df1['nocontinent'] = df1['continent'] == 'Missing'
df1['nocontinent'].value_counts()
#look at values for continent variable
df1['continent'].value_counts()
#check to see which teams are missing continents
df1['Team'][df1['continent'] == 'Missing'].value_counts()

Australasia  not found in regex
British West Indies  not found in regex
East Germany  not found in regex
West Germany  not found in regex
Netherlands Antilles  not found in regex
Refugee Olympic Team  not found in regex
Soviet Union  not found in regex
Unified Team  not found in regex
ROC  not found in regex
Serbia and Montenegro  not found in regex
Virgin Islands  not found in regex
Yugoslavia  not found in regex
Individual Neutral Athletes  not found in regex
Independent Olympic Athletes  not found in regex
Independent Olympic Participants  not found in regex
Mixed team  not found in regex


nocontinent
False    146
True      16
Name: count, dtype: int64

continent
Europe     45
Asia       38
Africa     29
America    29
Missing    16
Oceania     5
Name: count, dtype: int64

nocontinent
False    154
True       8
Name: count, dtype: int64

continent
Europe     51
Asia       38
America    31
Africa     29
Missing     8
Oceania     5
Name: count, dtype: int64

Team
Australasia                         1
Refugee Olympic Team                1
Unified Team                        1
ROC                                 1
Individual Neutral Athletes         1
Independent Olympic Athletes        1
Independent Olympic Participants    1
Mixed team                          1
Name: count, dtype: int64

In [70]:
#look at first 5 rows of df1
df1.head()

Unnamed: 0,Team (IOC code),Team,IOCcode,Games_summer,Gold_summer,Silver_summer,Bronze_summer,Total_summer,Games_winter,Gold_winter,...,Gold_total,Silver_total,Bronze_total,Totalmedals,iso_alpha3,no_iso3,iso_alpha2,no_iso2,continent,nocontinent
0,Afghanistan (AFG),Afghanistan,AFG,16,0,0,2,2,0,0,...,0,0,2,2,AFG,False,AF,False,Asia,False
1,Albania (ALB),Albania,ALB,10,0,0,2,2,5,0,...,0,0,2,2,ALB,False,AL,False,Europe,False
2,Algeria (ALG),Algeria,ALG,15,7,4,9,20,3,0,...,7,4,9,20,DZA,False,DZ,False,Africa,False
3,Argentina (ARG),Argentina,ARG,26,22,27,31,80,20,0,...,22,27,31,80,ARG,False,AR,False,America,False
4,Armenia (ARM),Armenia,ARM,8,2,11,9,22,8,0,...,2,11,9,22,ARM,False,AM,False,Asia,False


In [74]:
#create data frame with cases with iso alpha 3 codes for mapping
map_data = df1[df1['iso_alpha3'] != 'Missing']
map_data.shape

(146, 24)

In [94]:
#create map for Total number of Summer Olympic medals

fig1 = px.choropleth(map_data, locations='iso_alpha3', color = 'Total_summer', hover_name = 'Team', projection = 'natural earth',
                      title='Total Number of Summer Olympic Medals Won by Country using ISO Alpha-3 codes', 
                      labels= {'iso_alpha3': 'ISO Alpha-3 code','Total_summer': 'Medals Won'},
                      color_continuous_scale='ylorrd')
fig1.show()

In [95]:
#create map for Total number of Winter Olympic medals

fig2 = px.choropleth(map_data, locations='iso_alpha3', color = 'Total_winter', hover_name = 'Team', projection = 'natural earth',
                      title='Total Number of Winter Olympic Medals Won by Country using ISO Alpha-3 codes',
                      labels= {'iso_alpha3': 'ISO Alpha-3 code','Total_winter': 'Medals Won'},
                      color_continuous_scale='dense')
fig2.show()

In [96]:
#create map for Total Number of Olympic Gold medals

fig3 = px.choropleth(map_data, locations='iso_alpha3', color = 'Gold_total', hover_name = 'Team', projection='natural earth',
                      title='Total Number of Olympic Gold Medals Won by Country using ISO Alpha-3 codes',
                      labels= {'iso_alpha3': 'ISO Alpha-3 code','Gold_total': 'Medals Won'},
                      color_continuous_scale='oxy')                      
fig3.show()

In [97]:
#create map for Total number of Olympic Silver medals

fig4 = px.choropleth(map_data, locations='iso_alpha3', color = 'Silver_total', hover_name = 'Team', projection = 'natural earth',
                      title='Total Number of Olympic Silver Medals Won by Country using ISO Alpha-3 codes',
                      labels= {'iso_alpha3': 'ISO Alpha-3 code','Silver_total': 'Medals Won'},
                      color_continuous_scale='oxy')
fig4.show()

In [98]:
#create map for Total number of Olympic Bronze medals

fig5 = px.choropleth(map_data, locations='iso_alpha3', color = 'Bronze_total', hover_name = 'Team', projection = 'natural earth',
                      title='Total Number of Olympic Bronze Medals Won by Country using ISO Alpha-3 codes',
                      labels= {'iso_alpha3': 'ISO Alpha-3 code','Bronze_total': 'Medals Won'},
                      color_continuous_scale='oxy')
fig5.show()

In [100]:
#create map for Total number of Olympic medals

fig6 = px.choropleth(map_data, locations='iso_alpha3', color = 'Totalmedals', hover_name = 'Team', projection = 'natural earth',
                      title='Total Number of Olympic Medals Won by Country using ISO Alpha-3 codes',
                      labels= {'iso_alpha3': 'ISO Alpha-3 code','Totalmedals': 'Medals Won'},
                      color_continuous_scale='jet')
fig6.show()

In [123]:
#create dataframe with the 10 countries with the highest medal totals
#create dataframe with countries with the highest medal totals
toptentotal = df1[['Team', 'iso_alpha3', 'Totalmedals']].sort_values('Totalmedals', axis=0, ascending = False, inplace=False).reset_index(drop=True, inplace=False).head(10)
#create pie chart based on created dataframe
fig7 = px.pie(toptentotal, values='Totalmedals', names='Team', title='The 10 Countries that have won the Most Olympic Medals (All Countries)'
              , color_discrete_sequence=px.colors.sequential.Brwnyl_r, labels= {'Team': 'Country','Totalmedals': 'Medals Won'})
fig7.show()

In [124]:
#create dataframe with the 10 countries with the highest Summer Olympic Medal totals
toptensummer = df1[['Team', 'iso_alpha3', 'Total_summer']].sort_values('Total_summer', axis=0, ascending = False, inplace=False).reset_index(drop=True, inplace=False).head(10)
#create pie chart
fig8 = px.pie(toptensummer, values='Total_summer', names='Team', title='The 10 Countries that have won the Most Summer Olympic Medals (All Countries)'
              , color_discrete_sequence=px.colors.sequential.solar_r, labels= {'Team': 'Country','Total_summer': 'Medals Won'})
fig8.show()

In [113]:
#create dataframe with the 10 countries with the highest Winter Olympic Medal totals

toptenwinter = df1[['Team', 'iso_alpha3', 'Total_winter']].sort_values('Total_winter', axis=0, ascending = False, inplace=False).reset_index(drop=True, inplace=False).head(10)
#create pie chart
fig8 = px.pie(toptenwinter, values='Total_winter', names='Team', title='The 10 Countries that have won the Most Winter Olympic Medals (All Countries)'
              , color_discrete_sequence=px.colors.sequential.ice, labels= {'Team': 'Country','Total_winter': 'Medals Won'})
fig8.show()

In [126]:
#create a dataset with cases that have continent
con = df1[df1['continent'] != 'Missing']
con.shape

(154, 24)

In [130]:
#scatterplot for number of medals for all countries and teams in dataset
scat1 = px.scatter(con, x = 'Games_total' , y= 'Totalmedals', hover_data = ['Team'], color = 'continent')
scat1.show()