## Winter Olympics 2022 Medal Counts Web Scraping

In [1]:
# Imports

from bs4 import BeautifulSoup
import requests
import pandas as pd

In [2]:
# URL for searching Android tablets on Newegg.ca
url = "https://olympics.com/beijing-2022/olympic-games/en/results/all-sports/medal-standings.htm"

# Get Request
response = requests.get(url)

In [3]:
# Check status code
response.status_code

200

In [4]:
# Get the soup
soup = BeautifulSoup(response.content, 'html.parser')

In [5]:
# Table 
table = soup.find('table', id= 'medal-standing-table')

In [6]:
# Table Rows, remove header
table_rows = table.find_all('tr')
table_rows = table_rows[1:]

In [7]:
# Preview one of table rows:
table_rows[0]

<tr> <td class="text-center" data-text="1"> <strong>1</strong> </td> <td data-text="Norway"> <div class="playerTag" country="NOR"><a class="country" href="../../../en/results/all-sports/noc-medalist-by-sport-norway.htm" title="en/results/all-sports/noc-medalist-by-sport-norway"><img alt="" aria-hidden="true" class="flag" role="presentation" src="../../../static/common/img/flags/NOR.png"/><abbr class="noc" title="Norway">Norway</abbr></a></div></td> <td class="text-center"> <a href="../../../en/results/all-sports/noc-medalist-by-sport-norway-gold.htm" title="NOR Gold Medal Total">
16</a> </td> <td class="text-center"> <a href="../../../en/results/all-sports/noc-medalist-by-sport-norway-silver.htm" title="NOR Silver Medal Total">
8</a> </td> <td class="text-center"> <a href="../../../en/results/all-sports/noc-medalist-by-sport-norway-bronze.htm" title="NOR Bronze Medal Total">
13</a> </td> <td class="text-center"> <a href="../../../en/results/all-sports/noc-medalist-by-sport-norway.htm" 

In [8]:
table_rows[1].find_all('td')[1].get_text().strip()

'Germany'

In [9]:
len(table_rows)

29

## Extract Data

In [10]:
# Order
order = [x.find_all('td')[0].get_text().strip() for x in table_rows]

In [11]:
# Country / Association, they say NOC: National Olympic Committee
country = [x.find_all('td')[1].get_text().strip() for x in table_rows]

In [12]:
# Number of Gold Medals
num_gold_medals = [x.find_all('td')[2].get_text().strip() for x in table_rows]

In [13]:
# Number of Silver Medals
num_silver_medals = [x.find_all('td')[3].get_text().strip() for x in table_rows]

In [14]:
# Number of Bronze Medals
num_bronze_medals = [x.find_all('td')[4].get_text().strip() for x in table_rows]

In [15]:
# Medals Count Total
medals_count = [x.find_all('td')[5].get_text().strip() for x in table_rows]

In [16]:
# Order By Total
order_by_total = [x.find_all('td')[6].get_text().strip() for x in table_rows]

In [17]:
# Create dataframe:

winter2022_medals_df = pd.DataFrame({'Order': order, 
                                     'NOC': country, 
                                     'Gold Medals': num_gold_medals,
                                     'Silver Medals': num_silver_medals, 
                                     'Bronze Medals': num_bronze_medals, 
                                     'Medal Counts': medals_count,
                                     'Order By Total': order_by_total})

In [19]:
# Top 10 placing countries in Winter 2022 Beijing olympics:
winter2022_medals_df.head(10)

Unnamed: 0,Order,NOC,Gold Medals,Silver Medals,Bronze Medals,Medal Counts,Order By Total
0,1,Norway,16,8,13,37,1
1,2,Germany,12,10,5,27,3
2,3,People's Republic of China,9,4,2,15,11
3,4,United States of America,8,10,7,25,5
4,5,Sweden,8,5,5,18,6
5,6,Netherlands,8,5,4,17,9
6,7,Austria,7,7,4,18,6
7,8,Switzerland,7,2,5,14,12
8,9,ROC,6,12,14,32,2
9,10,France,5,7,2,14,12


In [20]:
# Bottom 10 placing countries in Winter 2022 Beijing olympics:
winter2022_medals_df.tail(10)

Unnamed: 0,Order,NOC,Gold Medals,Silver Medals,Bronze Medals,Medal Counts,Order By Total
19,20,Hungary,1,0,2,3,18
20,21,Belgium,1,0,1,2,20
21,21,Czech Republic,1,0,1,2,20
22,21,Slovakia,1,0,1,2,20
23,24,Belarus,0,2,0,2,20
24,25,Spain,0,1,0,1,25
25,25,Ukraine,0,1,0,1,25
26,27,Estonia,0,0,1,1,25
27,27,Latvia,0,0,1,1,25
28,27,Poland,0,0,1,1,25


In [21]:
## Save to .csv File:
winter2022_medals_df.to_csv("winter2022_olympics_medals.csv", index = False)