In [25]:
# Importing the required libraries
import requests
import pandas as pd
from bs4 import BeautifulSoup

In [26]:
# Downloading contents of the web page
url = "https://en.wikipedia.org/wiki/List_of_London_boroughs"
data = requests.get(url).text

In [27]:
data

'<!DOCTYPE html>\n<html class="client-nojs" lang="en" dir="ltr">\n<head>\n<meta charset="UTF-8"/>\n<title>List of London boroughs - Wikipedia</title>\n<script>document.documentElement.className="client-js";RLCONF={"wgBreakFrames":!1,"wgSeparatorTransformTable":["",""],"wgDigitTransformTable":["",""],"wgDefaultDateFormat":"dmy","wgMonthNames":["","January","February","March","April","May","June","July","August","September","October","November","December"],"wgRequestId":"3d502e76-e6f0-4381-bde8-0f0085b5570f","wgCSPNonce":!1,"wgCanonicalNamespace":"","wgCanonicalSpecialPageName":!1,"wgNamespaceNumber":0,"wgPageName":"List_of_London_boroughs","wgTitle":"List of London boroughs","wgCurRevisionId":1034773518,"wgRevisionId":1034773518,"wgArticleId":28092685,"wgIsArticle":!0,"wgIsRedirect":!1,"wgAction":"view","wgUserName":null,"wgUserGroups":["*"],"wgCategories":["Articles with short description","Short description is different from Wikidata","Use dmy dates from August 2015","Use British Engl

In [28]:
# Creating BeautifulSoup object
soup = BeautifulSoup(data, 'html.parser')

In [29]:
# Verifying tables and their classes
print('Classes of each table:')
for table in soup.find_all('table'):
    print(table.get('class'))

Classes of each table:
['wikitable', 'sortable']
['wikitable', 'sortable']
['noprint', 'infobox']
['nowraplinks', 'hlist', 'mw-collapsible', 'mw-collapsed', 'navbox-inner']
['nowraplinks', 'navbox-subgroup']


In [30]:
# Creating list with all tables
tables = soup.find_all('table')

#  Looking for the table with the classes 'wikitable' and 'sortable'
table = soup.find('table', class_='wikitable sortable')

In [31]:
# Defining of the dataframe
df = pd.DataFrame(columns=['Borough','Population'])

# Collecting Ddata
for row in table.tbody.find_all('tr'):
    # Find all data for each column
    columns = row.find_all('td')

    if(columns != []):
        borough = columns[0].text.strip()
        populaiton = columns[7].text.strip()

        df = df.append({'Borough': borough,  'Population': populaiton}, ignore_index=True)

In [32]:
df

Unnamed: 0,Borough,Population
0,Barking and Dagenham[note 1],212906
1,Barnet,395896
2,Bexley,248287
3,Brent,329771
4,Bromley,332336
5,Camden,270029
6,Croydon,386710
7,Ealing,341806
8,Enfield,333794
9,Greenwich [note 2],287942


In [33]:
df['Borough'] = df['Borough'].replace({'Barking and Dagenham[note 1]': 'Barking and Dagenham', 'Greenwich [note 2]':'Greenwich', 'Hammersmith and Fulham[note 4]':'Hammersmith and Fulham', })

In [34]:
df

Unnamed: 0,Borough,Population
0,Barking and Dagenham,212906
1,Barnet,395896
2,Bexley,248287
3,Brent,329771
4,Bromley,332336
5,Camden,270029
6,Croydon,386710
7,Ealing,341806
8,Enfield,333794
9,Greenwich,287942


In [36]:
df=df.apply(lambda x: x.str.replace(',',''))

In [37]:
df['Population']=df['Population'].astype(int)

In [38]:
df['Population'].sum()

8952295

In [39]:
population_city_of_london=8982000-df['Population'].sum()
population_city_of_london

29705

In [40]:
df.loc[32] = ['City of London', population_city_of_london]

In [41]:
df

Unnamed: 0,Borough,Population
0,Barking and Dagenham,212906
1,Barnet,395896
2,Bexley,248287
3,Brent,329771
4,Bromley,332336
5,Camden,270029
6,Croydon,386710
7,Ealing,341806
8,Enfield,333794
9,Greenwich,287942


In [42]:
df.to_csv('population.csv', index=False)