## Setup

In [1]:
import requests
from bs4 import BeautifulSoup

In [2]:
base_site = "https://en.wikipedia.org/wiki/List_of_national_capitals_by_population"

In [3]:
r = requests.get(base_site)

In [4]:
html = r.content

## Extracting Tables with Beautiful Soup

In [5]:
soup = BeautifulSoup(html,'lxml')

In [35]:
soup.find_all('table')[1]

<table border="1" class="wikitable sortable static-row-numbers plainrowheaders srn-white-background" style="text-align:right;">
<tbody><tr class="static-row-header" style="text-align:center;vertical-align:bottom;">
<th>Country (or territory)</th>
<th>Capital</th>
<th>Population</th>
<th style="max-width:4em">% of country</th>
<th>Source
</th></tr>
<tr>
<th style="text-align:left"><span class="flagicon" style="display:inline-block;width:25px;text-align:left;"><img alt="" class="thumbborder" data-file-height="600" data-file-width="900" decoding="async" height="15" src="//upload.wikimedia.org/wikipedia/commons/thumb/f/fa/Flag_of_the_People%27s_Republic_of_China.svg/23px-Flag_of_the_People%27s_Republic_of_China.svg.png" srcset="//upload.wikimedia.org/wikipedia/commons/thumb/f/fa/Flag_of_the_People%27s_Republic_of_China.svg/35px-Flag_of_the_People%27s_Republic_of_China.svg.png 1.5x, //upload.wikimedia.org/wikipedia/commons/thumb/f/fa/Flag_of_the_People%27s_Republic_of_China.svg/45px-Flag_of

In [9]:
table = soup.find_all('table')[1]

In [10]:
table.find_all('tr')[0].contents

['\n',
 <th>Country (or territory)</th>,
 '\n',
 <th>Capital</th>,
 '\n',
 <th>Population</th>,
 '\n',
 <th style="max-width:4em">% of country</th>,
 '\n',
 <th>Source
 </th>]

In [19]:
capitals = [row.contents[3].text for row in table.find_all('tr')]
capitals

['Capital',
 ' Beijing',
 ' Tokyo',
 ' Kinshasa',
 ' Moscow',
 ' Jakarta',
 ' Seoul',
 ' Cairo',
 ' Mexico City',
 ' London',
 ' Dhaka',
 ' Lima',
 ' Tehran',
 ' Bangkok',
 ' Hanoi',
 ' Riyadh',
 ' Hong Kong',
 ' Bogotá',
 ' Baghdad',
 ' Santiago',
 ' Singapore',
 ' Ankara',
 ' Berlin',
 ' Damascus',
 ' Algiers',
 ' Madrid',
 ' Pyongyang',
 ' Kabul',
 ' Nairobi',
 ' Addis Ababa',
 ' Buenos Aires',
 ' Rome',
 ' Kyiv',
 ' Yaoundé',
 ' Taipei',
 ' Brasília',
 ' Amman',
 ' Luanda',
 ' Guatemala City',
 ' Pretoria',
 ' Paris',
 ' Tashkent',
 ' Baku',
 ' Havana',
 ' Phnom Penh',
 ' Bucharest',
 ' Khartoum',
 ' Caracas',
 ' Brazzaville',
 ' Rabat',
 ' Manila',
 ' Vienna',
 ' Budapest',
 ' Warsaw',
 ' Minsk',
 ' Kampala',
 ' Accra',
 ' Antananarivo',
 ' Beirut',
 ' Quito',
 ' Harare',
 ' Doha',
 " Sana'a",
 ' Conakry',
 ' Kuala Lumpur',
 ' Montevideo',
 ' Lusaka',
 ' Bamako',
 ' Sofia',
 ' Prague',
 ' Port-au-Prince',
 ' Tripoli',
 ' Dublin',
 ' Kuwait City',
 ' Belgrade',
 ' Santo Domingo',
 

## Using Pandas to Extract Tables


In [20]:
import pandas as pd


In [21]:
tables = pd.read_html(base_site)

In [22]:
type(tables[0])

pandas.core.frame.DataFrame

In [23]:
len(tables)

4

In [24]:
tables[1]

Unnamed: 0,Country (or territory),Capital,Population,% of country,Source
0,China *,Beijing,21542000,,[1] 2010
1,Japan *,Tokyo,13929286,,[2] 2017
2,DR Congo,Kinshasa,12691000,,[3] 2017
3,Russia *,Moscow,12506468,,[4] 2011
4,Indonesia *,Jakarta,10075310,,[5] 2011
...,...,...,...,...,...
235,Palau,Ngerulmud,271,,
236,Cocos (Keeling) Islands (Australia),West Island,120,,
237,Pitcairn Islands (UK),Adamstown,50,,
238,South Georgia and the South Sandwich Islands (UK),King Edward Point,22,,


In [25]:
tables[1].columns

Index(['Country (or territory)', 'Capital', 'Population', '% of country',
       'Source'],
      dtype='object')

In [34]:
pd.read_html(base_site,attrs={"class":"sidebar"})

[                                   Lists of capitals
                                         Of countries
 0  in alphabetical order by latitude by populatio...
 1                            Of country subdivisions
 2  Capitals outside the territories they serve Pu...
 3                                      Cities portal
 4  .mw-parser-output .navbar{display:inline;font-...]

In [36]:
pd.read_html(base_site,attrs={"class":"wikitable"})

[                                Country (or territory)              Capital  \
 0                                              China *              Beijing   
 1                                              Japan *                Tokyo   
 2                                             DR Congo             Kinshasa   
 3                                             Russia *               Moscow   
 4                                          Indonesia *              Jakarta   
 ..                                                 ...                  ...   
 235                                              Palau            Ngerulmud   
 236                Cocos (Keeling) Islands (Australia)          West Island   
 237                              Pitcairn Islands (UK)            Adamstown   
 238  South Georgia and the South Sandwich Islands (UK)    King Edward Point   
 239                         Norfolk Island (Australia)  Kingston (de facto)   
 
     Population % of country    Source

## Assignment

In [37]:
site = "https://store.steampowered.com/tags/en/Action/"

In [38]:
response = requests.get(site)

In [45]:
htm = response.content

In [48]:
soup = BeautifulSoup(htm,'html.parser')

In [49]:
soup.find_all('table')

[]