# Web-scrapping of Covid19 Data from Worldometers' Website

In [1]:
#Importing the required libraries

In [2]:
import pandas as pd
import requests
from bs4 import BeautifulSoup

In [3]:
#Requesting text data from the link

In [4]:
url = 'https://www.worldometers.info/coronavirus/'
link = requests.get(url).text

In [5]:
#Calling the BeautifulSoup Library to increase readibility

In [6]:
soup = BeautifulSoup(link, 'lxml')
print(soup.prettify())

<!DOCTYPE html>
<!--[if IE 8]> <html lang="en" class="ie8"> <![endif]-->
<!--[if IE 9]> <html lang="en" class="ie9"> <![endif]-->
<!--[if !IE]><!-->
<html lang="en">
 <!--<![endif]-->
 <head>
  <meta charset="utf-8"/>
  <meta content="IE=edge" http-equiv="X-UA-Compatible"/>
  <meta content="width=device-width, initial-scale=1" name="viewport"/>
  <title>
   COVID Live - Coronavirus Statistics - Worldometer
  </title>
  <meta content="Live statistics and coronavirus news tracking the number of confirmed cases, recovered patients, tests, and death toll due to the COVID-19 coronavirus from Wuhan, China. Coronavirus counter with new cases, deaths, and number of tests per 1 Million population. Historical data and info. Daily charts, graphs, news and updates" name="description"/>
  <link href="/favicon/favicon.ico" rel="shortcut icon" type="image/x-icon"/>
  <link href="/favicon/apple-icon-57x57.png" rel="apple-touch-icon" sizes="57x57"/>
  <link href="/favicon/apple-icon-60x60.png" rel="app

In [7]:
#Looking for the required table

In [8]:
soup.find_all('table')

[<table class="table table-bordered table-hover main_table_countries" id="main_table_countries_today" style="width:100%;margin-top: 0px !important;display:none;">
 <thead>
 <tr>
 <th width="1%">#</th>
 <th width="100">Country,<br/>Other</th>
 <th width="20">Total<br/>Cases</th>
 <th width="30">New<br/>Cases</th>
 <th width="30">Total<br/>Deaths</th>
 <th width="30">New<br/>Deaths</th>
 <th width="30">Total<br/>Recovered</th>
 <th width="30">New<br/>Recovered</th>
 <th width="30">Active<br/>Cases</th>
 <th width="30">Serious,<br/>Critical</th>
 <th width="30">Tot Cases/<br/>1M pop</th>
 <th width="30">Deaths/<br/>1M pop</th>
 <th width="30">Total<br/>Tests</th>
 <th width="30">Tests/<br/>
 <nobr>1M pop</nobr>
 </th>
 <th width="30">Population</th>
 <th style="display:none" width="30">Continent</th>
 <th width="30">1 Case<br/>every X ppl</th><th width="30">1 Death<br/>every X ppl</th><th width="30">1 Test<br/>every X ppl</th>
 <th width="30">New Cases/1M pop</th>
 <th width="30">New Deat

In [9]:
#Finding the required table and extracting the table headers

In [10]:
table = soup.find('table', id = 'main_table_countries_today')

header = []
for th in table.find_all('th'):
    header.append(th.text.replace('\n', ' '))

header

['#',
 'Country,Other',
 'TotalCases',
 'NewCases',
 'TotalDeaths',
 'NewDeaths',
 'TotalRecovered',
 'NewRecovered',
 'ActiveCases',
 'Serious,Critical',
 'Tot\xa0Cases/1M pop',
 'Deaths/1M pop',
 'TotalTests',
 'Tests/ 1M pop ',
 'Population',
 'Continent',
 '1 Caseevery X ppl',
 '1 Deathevery X ppl',
 '1 Testevery X ppl',
 'New Cases/1M pop',
 'New Deaths/1M pop',
 'Active Cases/1M pop']

In [11]:
#Creating the Data Frame using the extracted headers

In [12]:
covid_data = pd.DataFrame(columns = header)

In [13]:
#Filling the Data Frame with data

In [14]:
for i in table.find_all('tr')[1:]:
    row_data = i.find_all('td')
    row = [j.text.replace('\n', ' ') for j in row_data]
    length = len(covid_data)
    covid_data.loc[length] = row

covid_data.shape

(246, 22)

In [15]:
covid_data.head(10)

Unnamed: 0,#,"Country,Other",TotalCases,NewCases,TotalDeaths,NewDeaths,TotalRecovered,NewRecovered,ActiveCases,"Serious,Critical",...,TotalTests,Tests/ 1M pop,Population,Continent,1 Caseevery X ppl,1 Deathevery X ppl,1 Testevery X ppl,New Cases/1M pop,New Deaths/1M pop,Active Cases/1M pop
0,,North America,114359956,5654.0,1524164,38.0,108030194,9865.0,4805598,9387,...,,,,North America,,,,,,
1,,Asia,183732353,286464.0,1467045,490.0,175419277,380809.0,6846031,12746,...,,,,Asia,,,,,,
2,,Europe,221792165,62617.0,1903084,137.0,215585036,115006.0,4304045,9182,...,,,,Europe,,,,,,
3,,South America,63644791,,1325301,,61653835,4385.0,665655,10526,...,,,,South America,,,,,,
4,,Oceania,12156291,13798.0,19485,56.0,11924750,2884.0,212056,129,...,,,,Australia/Oceania,,,,,,
5,,Africa,12606024,,257368,,11939669,,408987,1025,...,,,,Africa,,,,,,
6,,,721,,15,,706,,0,0,...,,,,,,,,,,
7,,World,608292301,368533.0,6496462,721.0,584553467,512949.0,17242372,42995,...,,,,All,,,,,,
8,1.0,USA,96347971,,1071420,,92023741,,3252810,4006,...,1101062482.0,,,North America,,,,,,
9,2.0,India,44436339,,527911,,43845680,,62748,698,...,886147613.0,628781.0,1409310426.0,Asia,32.0,2670.0,2.0,,,45.0


In [16]:
covid_data.tail(10)

Unnamed: 0,#,"Country,Other",TotalCases,NewCases,TotalDeaths,NewDeaths,TotalRecovered,NewRecovered,ActiveCases,"Serious,Critical",...,TotalTests,Tests/ 1M pop,Population,Continent,1 Caseevery X ppl,1 Deathevery X ppl,1 Testevery X ppl,New Cases/1M pop,New Deaths/1M pop,Active Cases/1M pop
236,229.0,Saint Helena,7,,,,2,,5,,...,,,6116.0,Africa,874.0,,,,,818.0
237,230.0,China,243449,368.0,5226.0,,231925,405.0,6298,34.0,...,160000000.0,111163.0,1439323776.0,Asia,5912.0,275416.0,9.0,0.3,,4.0
238,,Total:,114359956,5654.0,1524164.0,38.0,108030194,9865.0,4805598,9387.0,...,,,,North America,,,,,,
239,,Total:,183732353,286464.0,1467045.0,490.0,175419277,380809.0,6846031,12746.0,...,,,,Asia,,,,,,
240,,Total:,221792165,62617.0,1903084.0,137.0,215585036,115006.0,4304045,9182.0,...,,,,Europe,,,,,,
241,,Total:,63644791,,1325301.0,,61653835,,665655,10526.0,...,,,,South America,,,,,,
242,,Total:,12156291,13798.0,19485.0,56.0,11924750,2884.0,212056,129.0,...,,,,Australia/Oceania,,,,,,
243,,Total:,12606024,,257368.0,,11939669,,408987,1025.0,...,,,,Africa,,,,,,
244,,Total:,721,,15.0,,706,,0,0.0,...,,,,,,,,,,
245,,Total:,608292301,368533.0,6496462.0,721.0,584553467,512949.0,17242372,42995.0,...,,,,All,,,,,,


In [17]:
#Cleaning the Data Frame

In [18]:
covid_data = covid_data[covid_data['#'] != '']
covid_data.reset_index(inplace = True, drop = True)
covid_data.shape

(230, 22)

In [19]:
covid_data.head(10)

Unnamed: 0,#,"Country,Other",TotalCases,NewCases,TotalDeaths,NewDeaths,TotalRecovered,NewRecovered,ActiveCases,"Serious,Critical",...,TotalTests,Tests/ 1M pop,Population,Continent,1 Caseevery X ppl,1 Deathevery X ppl,1 Testevery X ppl,New Cases/1M pop,New Deaths/1M pop,Active Cases/1M pop
0,1,USA,96347971,,1071420,,92023741,,3252810,4006,...,1101062482,,,North America,,,,,,
1,2,India,44436339,,527911,,43845680,,62748,698,...,886147613,628781.0,1409310426.0,Asia,32.0,2670.0,2.0,,,45.0
2,3,France,34529201,,154093,,34015188,,359920,869,...,271490188,4139492.0,65585393.0,Europe,2.0,426.0,0.0,,,5488.0
3,4,Brazil,34472679,,684029,,33480980,,307670,8318,...,63776166,295502.0,215822801.0,South America,6.0,316.0,3.0,,,1426.0
4,5,Germany,32184553,,147494,,31317600,51600.0,719459,1406,...,122332384,1450081.0,84362457.0,Europe,3.0,572.0,1.0,,,8528.0
5,6,UK,23521792,,188242,,23205649,420.0,127901,146,...,522526476,7610774.0,68656152.0,Europe,3.0,365.0,0.0,,,1863.0
6,7,S. Korea,23327897,81499.0,26876,112.0,21329194,119528.0,1971827,555,...,15804065,307685.0,51364453.0,Asia,2.0,1911.0,3.0,1587.0,2.0,38389.0
7,8,Italy,21867757,,175595,,21046229,,645933,213,...,242418090,4022209.0,60269892.0,Europe,3.0,343.0,0.0,,,10717.0
8,9,Russia,19578730,49761.0,384441,95.0,18635574,38940.0,558715,2300,...,273400000,1871711.0,146069569.0,Europe,7.0,380.0,1.0,341.0,0.7,3825.0
9,10,Japan,18939344,167340.0,39880,316.0,17110166,234203.0,1789298,555,...,71721798,570848.0,125640688.0,Asia,7.0,3150.0,2.0,1332.0,3.0,14241.0


In [20]:
covid_data.tail(10)

Unnamed: 0,#,"Country,Other",TotalCases,NewCases,TotalDeaths,NewDeaths,TotalRecovered,NewRecovered,ActiveCases,"Serious,Critical",...,TotalTests,Tests/ 1M pop,Population,Continent,1 Caseevery X ppl,1 Deathevery X ppl,1 Testevery X ppl,New Cases/1M pop,New Deaths/1M pop,Active Cases/1M pop
220,221,Macao,793,,6.0,,785.0,,2,,...,7850.0,11743.0,668456.0,Asia,843.0,111409.0,85.0,,,3.0
221,222,Wallis and Futuna,761,,7.0,,438.0,,316,,...,20508.0,1896431.0,10814.0,Australia/Oceania,14.0,1545.0,1.0,,,29221.0
222,223,Diamond Princess,712,,13.0,,699.0,,0,,...,,,,,,,,,,
223,224,Niue,70,,,,60.0,,10,,...,,,1650.0,Australia/Oceania,24.0,,,,,6061.0
224,225,Vatican City,29,,,,29.0,,0,,...,,,805.0,Europe,28.0,,,,,
225,226,Tuvalu,20,,,,,,20,,...,,,12106.0,Australia/Oceania,605.0,,,,,1652.0
226,227,Western Sahara,10,,1.0,,9.0,,0,,...,,,629068.0,Africa,62907.0,629068.0,,,,
227,228,MS Zaandam,9,,2.0,,7.0,,0,,...,,,,,,,,,,
228,229,Saint Helena,7,,,,2.0,,5,,...,,,6116.0,Africa,874.0,,,,,818.0
229,230,China,243449,368.0,5226.0,,231925.0,405.0,6298,34.0,...,160000000.0,111163.0,1439323776.0,Asia,5912.0,275416.0,9.0,0.3,,4.0


In [21]:
covid_data.drop('#', inplace = True, axis = 1)
covid_data.shape

(230, 21)

In [22]:
covid_data.head(10)

Unnamed: 0,"Country,Other",TotalCases,NewCases,TotalDeaths,NewDeaths,TotalRecovered,NewRecovered,ActiveCases,"Serious,Critical",Tot Cases/1M pop,...,TotalTests,Tests/ 1M pop,Population,Continent,1 Caseevery X ppl,1 Deathevery X ppl,1 Testevery X ppl,New Cases/1M pop,New Deaths/1M pop,Active Cases/1M pop
0,USA,96347971,,1071420,,92023741,,3252810,4006,,...,1101062482,,,North America,,,,,,
1,India,44436339,,527911,,43845680,,62748,698,31531.0,...,886147613,628781.0,1409310426.0,Asia,32.0,2670.0,2.0,,,45.0
2,France,34529201,,154093,,34015188,,359920,869,526477.0,...,271490188,4139492.0,65585393.0,Europe,2.0,426.0,0.0,,,5488.0
3,Brazil,34472679,,684029,,33480980,,307670,8318,159727.0,...,63776166,295502.0,215822801.0,South America,6.0,316.0,3.0,,,1426.0
4,Germany,32184553,,147494,,31317600,51600.0,719459,1406,381503.0,...,122332384,1450081.0,84362457.0,Europe,3.0,572.0,1.0,,,8528.0
5,UK,23521792,,188242,,23205649,420.0,127901,146,342603.0,...,522526476,7610774.0,68656152.0,Europe,3.0,365.0,0.0,,,1863.0
6,S. Korea,23327897,81499.0,26876,112.0,21329194,119528.0,1971827,555,454164.0,...,15804065,307685.0,51364453.0,Asia,2.0,1911.0,3.0,1587.0,2.0,38389.0
7,Italy,21867757,,175595,,21046229,,645933,213,362831.0,...,242418090,4022209.0,60269892.0,Europe,3.0,343.0,0.0,,,10717.0
8,Russia,19578730,49761.0,384441,95.0,18635574,38940.0,558715,2300,134037.0,...,273400000,1871711.0,146069569.0,Europe,7.0,380.0,1.0,341.0,0.7,3825.0
9,Japan,18939344,167340.0,39880,316.0,17110166,234203.0,1789298,555,150742.0,...,71721798,570848.0,125640688.0,Asia,7.0,3150.0,2.0,1332.0,3.0,14241.0


In [23]:
covid_data.tail(10)

Unnamed: 0,"Country,Other",TotalCases,NewCases,TotalDeaths,NewDeaths,TotalRecovered,NewRecovered,ActiveCases,"Serious,Critical",Tot Cases/1M pop,...,TotalTests,Tests/ 1M pop,Population,Continent,1 Caseevery X ppl,1 Deathevery X ppl,1 Testevery X ppl,New Cases/1M pop,New Deaths/1M pop,Active Cases/1M pop
220,Macao,793,,6.0,,785.0,,2,,1186.0,...,7850.0,11743.0,668456.0,Asia,843.0,111409.0,85.0,,,3.0
221,Wallis and Futuna,761,,7.0,,438.0,,316,,70372.0,...,20508.0,1896431.0,10814.0,Australia/Oceania,14.0,1545.0,1.0,,,29221.0
222,Diamond Princess,712,,13.0,,699.0,,0,,,...,,,,,,,,,,
223,Niue,70,,,,60.0,,10,,42424.0,...,,,1650.0,Australia/Oceania,24.0,,,,,6061.0
224,Vatican City,29,,,,29.0,,0,,36025.0,...,,,805.0,Europe,28.0,,,,,
225,Tuvalu,20,,,,,,20,,1652.0,...,,,12106.0,Australia/Oceania,605.0,,,,,1652.0
226,Western Sahara,10,,1.0,,9.0,,0,,16.0,...,,,629068.0,Africa,62907.0,629068.0,,,,
227,MS Zaandam,9,,2.0,,7.0,,0,,,...,,,,,,,,,,
228,Saint Helena,7,,,,2.0,,5,,1145.0,...,,,6116.0,Africa,874.0,,,,,818.0
229,China,243449,368.0,5226.0,,231925.0,405.0,6298,34.0,169.0,...,160000000.0,111163.0,1439323776.0,Asia,5912.0,275416.0,9.0,0.3,,4.0


In [24]:
#Exporting the Data Frame to Excel

In [25]:
covid_data.to_excel('Covid_Data.xlsx')