### Project examining All-Time Olympic Medal counts
#### by Erika Harrell

In [158]:
# importing Python libraries
import requests 
import pandas as pd
from bs4 import BeautifulSoup
from io import StringIO
#show all output in each cell
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [159]:
#scraping table from Wikipedia page
url = "https://en.wikipedia.org/wiki/All-time_Olympic_Games_medal_table"
#sending url to requests to create HTML object
response = requests.get(url)
#using Beautiful Soup's HTML parser to parse content in HTML object
soup = BeautifulSoup(response.content, "html.parser")
#finding all objects in URL with table label
tables = soup.find_all("table")
# getting the second element of tables wihch is the first table on the Wikipedia page
df1 = pd.read_html(StringIO(str(tables[1])))
# printing element
print(df1)

[                                            Team Summer Olympic Games  \
                                 Team (IOC code)                  No.   
0                              Afghanistan (AFG)                   16   
1                                  Albania (ALB)                   10   
2                                  Algeria (ALG)                   15   
3                                Argentina (ARG)                   26   
4                                  Armenia (ARM)                    8   
..                                           ...                  ...   
158       Individual Neutral Athletes (AIN)[AIN]                    1   
159      Independent Olympic Athletes (IOA)[IOA]                    3   
160  Independent Olympic Participants (IOP)[IOP]                    1   
161                        Mixed team (ZZX)[ZZX]                    3   
162                                       Totals                   30   

                                                 

In [160]:
#get information about df1
type(df1)
len(df1)
type(df1[0])

list

1

pandas.core.frame.DataFrame

In [161]:
#get dataframe out of list
df = df1[0]
type(df)

pandas.core.frame.DataFrame

In [162]:
# get information about dataframe
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 163 entries, 0 to 162
Data columns (total 16 columns):
 #   Column                                       Non-Null Count  Dtype 
---  ------                                       --------------  ----- 
 0   (Team, Team (IOC code))                      163 non-null    object
 1   (Summer Olympic Games, No.)                  163 non-null    int64 
 2   (Summer Olympic Games, Unnamed: 2_level_1)   163 non-null    int64 
 3   (Summer Olympic Games, Unnamed: 3_level_1)   163 non-null    int64 
 4   (Summer Olympic Games, Unnamed: 4_level_1)   163 non-null    int64 
 5   (Summer Olympic Games, Unnamed: 5_level_1)   163 non-null    int64 
 6   (Winter Olympic Games, No.)                  163 non-null    int64 
 7   (Winter Olympic Games, Unnamed: 7_level_1)   163 non-null    int64 
 8   (Winter Olympic Games, Unnamed: 8_level_1)   163 non-null    int64 
 9   (Winter Olympic Games, Unnamed: 9_level_1)   163 non-null    int64 
 10  (Winter Olympi

In [163]:
# dropping the last row of the dataframe
df =df.iloc[:-1]
print(df)

                                            Team Summer Olympic Games  \
                                 Team (IOC code)                  No.   
0                              Afghanistan (AFG)                   16   
1                                  Albania (ALB)                   10   
2                                  Algeria (ALG)                   15   
3                                Argentina (ARG)                   26   
4                                  Armenia (ARM)                    8   
..                                           ...                  ...   
157                          Zimbabwe (ZIM)[ZIM]                   15   
158       Individual Neutral Athletes (AIN)[AIN]                    1   
159      Independent Olympic Athletes (IOA)[IOA]                    3   
160  Independent Olympic Participants (IOP)[IOP]                    1   
161                        Mixed team (ZZX)[ZZX]                    3   

                                                  

In [164]:
# get column names
df.columns

MultiIndex([(                'Team',     'Team (IOC code)'),
            ('Summer Olympic Games',                 'No.'),
            ('Summer Olympic Games',  'Unnamed: 2_level_1'),
            ('Summer Olympic Games',  'Unnamed: 3_level_1'),
            ('Summer Olympic Games',  'Unnamed: 4_level_1'),
            ('Summer Olympic Games',  'Unnamed: 5_level_1'),
            ('Winter Olympic Games',                 'No.'),
            ('Winter Olympic Games',  'Unnamed: 7_level_1'),
            ('Winter Olympic Games',  'Unnamed: 8_level_1'),
            ('Winter Olympic Games',  'Unnamed: 9_level_1'),
            ('Winter Olympic Games', 'Unnamed: 10_level_1'),
            (      'Combined total',                 'No.'),
            (      'Combined total', 'Unnamed: 12_level_1'),
            (      'Combined total', 'Unnamed: 13_level_1'),
            (      'Combined total', 'Unnamed: 14_level_1'),
            (      'Combined total', 'Unnamed: 15_level_1')],
           )

In [165]:
#examining columns
df.head(1)
df.columns[0]
#getting type of upper level columns
print(type(df['Team']))
print(type(df['Summer Olympic Games']))
print(type(df['Winter Olympic Games']))
print(type(df['Combined total']))


Unnamed: 0_level_0,Team,Summer Olympic Games,Summer Olympic Games,Summer Olympic Games,Summer Olympic Games,Summer Olympic Games,Winter Olympic Games,Winter Olympic Games,Winter Olympic Games,Winter Olympic Games,Winter Olympic Games,Combined total,Combined total,Combined total,Combined total,Combined total
Unnamed: 0_level_1,Team (IOC code),No.,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,No.,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,No.,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
0,Afghanistan (AFG),16,0,0,2,2,0,0,0,0,0,16,0,0,2,2


('Team', 'Team (IOC code)')

<class 'pandas.core.frame.DataFrame'>
<class 'pandas.core.frame.DataFrame'>
<class 'pandas.core.frame.DataFrame'>
<class 'pandas.core.frame.DataFrame'>


In [166]:
#creating new dataframe from nested dataframes in df
#working with Teams data frame
df1 = df['Team']
type(df1)
df1.columns[0]
df1.shape
df1.head()
#creating separate Team variable
v1=df1['Team (IOC code)'].str.split("(", expand=True)[0].to_frame()
type(v1)
#creating separate IOC code variable
v2=df1['Team (IOC code)'].str.split("(", expand=True)[1].to_frame()
type(v2)
#adding Team and IOC code variables to df1
df1=pd.concat([df1,v1, v2], axis=1)
df1.head()
#add column names to columns
df1.columns
df1.rename(columns={0 : 'Team', 1 : 'IOCcode'}, inplace = True)
df1.columns
#drop ) from IOC code column
df1['IOCcode'] = df1['IOCcode'].str.replace(')', '')
df1['IOCcode'].head()

pandas.core.frame.DataFrame

'Team (IOC code)'

(162, 1)

Unnamed: 0,Team (IOC code)
0,Afghanistan (AFG)
1,Albania (ALB)
2,Algeria (ALG)
3,Argentina (ARG)
4,Armenia (ARM)


pandas.core.frame.DataFrame

pandas.core.frame.DataFrame

Unnamed: 0,Team (IOC code),0,1
0,Afghanistan (AFG),Afghanistan,AFG)
1,Albania (ALB),Albania,ALB)
2,Algeria (ALG),Algeria,ALG)
3,Argentina (ARG),Argentina,ARG)
4,Armenia (ARM),Armenia,ARM)


Index(['Team (IOC code)', 0, 1], dtype='object')

Index(['Team (IOC code)', 'Team', 'IOCcode'], dtype='object')

0    AFG
1    ALB
2    ALG
3    ARG
4    ARM
Name: IOCcode, dtype: object

In [167]:
#working on Summer columns
Summer = df['Summer Olympic Games']
type(Summer)
Summer.columns
Summer.shape
Summer.head()
#add Summer to df1
df1 = pd.concat([df1,Summer], axis=1)
#rename columns in Summer
df1.rename(columns={'No.' : 'Games_summer', 
                       'Unnamed: 2_level_1' : 'Gold_summer',
                         'Unnamed: 3_level_1' : 'Silver_summer', 
                         'Unnamed: 4_level_1':'Bronze_summer',
       'Unnamed: 5_level_1':'Total_summer'}, inplace = True)
#check columns
df1.columns

pandas.core.frame.DataFrame

Index(['No.', 'Unnamed: 2_level_1', 'Unnamed: 3_level_1', 'Unnamed: 4_level_1',
       'Unnamed: 5_level_1'],
      dtype='object')

(162, 5)

Unnamed: 0,No.,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0,16,0,0,2,2
1,10,0,0,2,2
2,15,7,4,9,20
3,26,22,27,31,80
4,8,2,11,9,22


Index(['Team (IOC code)', 'Team', 'IOCcode', 'Games_summer', 'Gold_summer',
       'Silver_summer', 'Bronze_summer', 'Total_summer'],
      dtype='object')

In [168]:
#working on Winter columns
Winter = df['Winter Olympic Games']
type(Winter)
Winter.columns
Winter.shape
Winter.head()
#add Winter to df1
df1 = pd.concat([df1,Winter], axis=1)
#rename columns in Winter
df1.rename(columns={'No.' : 'Games_winter', 
                       'Unnamed: 7_level_1' : 'Gold_winter',
                         'Unnamed: 8_level_1' : 'Silver_winter', 
                         'Unnamed: 9_level_1':'Bronze_winter',
       'Unnamed: 10_level_1':'Total_winter'}, inplace = True)
#check columns
df1.columns

pandas.core.frame.DataFrame

Index(['No.', 'Unnamed: 7_level_1', 'Unnamed: 8_level_1', 'Unnamed: 9_level_1',
       'Unnamed: 10_level_1'],
      dtype='object')

(162, 5)

Unnamed: 0,No.,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
0,0,0,0,0,0
1,5,0,0,0,0
2,3,0,0,0,0
3,20,0,0,0,0
4,8,0,0,0,0


Index(['Team (IOC code)', 'Team', 'IOCcode', 'Games_summer', 'Gold_summer',
       'Silver_summer', 'Bronze_summer', 'Total_summer', 'Games_winter',
       'Gold_winter', 'Silver_winter', 'Bronze_winter', 'Total_winter'],
      dtype='object')

In [169]:
#working on Combined total columns
Combo = df['Combined total']
type(Combo)
Combo.columns
Combo.shape
Combo.head()
#add Winter to df1
df1 = pd.concat([df1,Combo], axis=1)
#rename columns in Winter
df1.rename(columns={'No.' : 'Games_total', 
                       'Unnamed: 12_level_1' : 'Gold_total',
                         'Unnamed: 13_level_1' : 'Silver_total', 
                         'Unnamed: 14_level_1':'Bronze_total',
       'Unnamed: 15_level_1':'Totalmedals'}, inplace = True)
#check columns
df1.columns

pandas.core.frame.DataFrame

Index(['No.', 'Unnamed: 12_level_1', 'Unnamed: 13_level_1',
       'Unnamed: 14_level_1', 'Unnamed: 15_level_1'],
      dtype='object')

(162, 5)

Unnamed: 0,No.,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
0,16,0,0,2,2
1,15,0,0,2,2
2,18,7,4,9,20
3,46,22,27,31,80
4,16,2,11,9,22


Index(['Team (IOC code)', 'Team', 'IOCcode', 'Games_summer', 'Gold_summer',
       'Silver_summer', 'Bronze_summer', 'Total_summer', 'Games_winter',
       'Gold_winter', 'Silver_winter', 'Bronze_winter', 'Total_winter',
       'Games_total', 'Gold_total', 'Silver_total', 'Bronze_total',
       'Totalmedals'],
      dtype='object')

In [171]:
#information about dataframe
df1.info()
df1.shape
df1.head(1)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 162 entries, 0 to 161
Data columns (total 18 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   Team (IOC code)  162 non-null    object
 1   Team             162 non-null    object
 2   IOCcode          162 non-null    object
 3   Games_summer     162 non-null    int64 
 4   Gold_summer      162 non-null    int64 
 5   Silver_summer    162 non-null    int64 
 6   Bronze_summer    162 non-null    int64 
 7   Total_summer     162 non-null    int64 
 8   Games_winter     162 non-null    int64 
 9   Gold_winter      162 non-null    int64 
 10  Silver_winter    162 non-null    int64 
 11  Bronze_winter    162 non-null    int64 
 12  Total_winter     162 non-null    int64 
 13  Games_total      162 non-null    int64 
 14  Gold_total       162 non-null    int64 
 15  Silver_total     162 non-null    int64 
 16  Bronze_total     162 non-null    int64 
 17  Totalmedals      162 non-null    in

(162, 18)

Unnamed: 0,Team (IOC code),Team,IOCcode,Games_summer,Gold_summer,Silver_summer,Bronze_summer,Total_summer,Games_winter,Gold_winter,Silver_winter,Bronze_winter,Total_winter,Games_total,Gold_total,Silver_total,Bronze_total,Totalmedals
0,Afghanistan (AFG),Afghanistan,AFG,16,0,0,2,2,0,0,0,0,0,16,0,0,2,2
