# Summary
A simple web scraper a Coronavirus tracker. Grabs data from a table and loads into a pandas dataframe, ready to be exported or analyzed.

In [1]:
# imports
import requests
import pandas as pd
import numpy as np
from bs4 import BeautifulSoup

# pandas display options
pd.set_option('display.max_rows', 1000)
pd.set_option('display.max_columns', 1000)

In [2]:
# url -> html
url = 'https://www.worldometers.info/coronavirus/'
r = requests.get(url)
html = r.text
soup = BeautifulSoup(html)

In [3]:
# find table
table = soup.find('table', id='main_table_countries_today')

In [4]:
# get headers
headers = []
th = table.find_all('th')
for h in th:
    headers.append(h.text)

In [5]:
# create array from each row
rows = table.tbody.find_all('tr')
array = []
for i in range(len(rows)):
#for r in rows:
    row = []
    cells = rows[i].find_all('td')
    #cells = r.find_all('td')
    for cell in cells:
        a = cell.find_all('a')
        row.append(cell.text)
    array.append(row)
array = np.array(array)

In [6]:
# put into dataframe
df = pd.DataFrame(array, columns=headers)
df = df.loc[8:]
df.set_index('#', inplace=True)
df

Unnamed: 0_level_0,"Country,Other",TotalCases,NewCases,TotalDeaths,NewDeaths,TotalRecovered,NewRecovered,ActiveCases,"Serious,Critical",Tot Cases/1M pop,Deaths/1M pop,TotalTests,Tests/\n1M pop\n,Population,Continent,1 Caseevery X ppl,1 Deathevery X ppl,1 Testevery X ppl,New Cases/1M pop,New Deaths/1M pop,Active Cases/1M pop
#,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,USA,106524168,,1158842.0,,104423700.0,,941626.0,1517.0,318168.0,3461.0,1176315905.0,3513433.0,334805269.0,North America,3.0,289.0,0.0,,,2812.0
2,India,44869684,,531258.0,,44272256.0,,66170.0,,31899.0,378.0,924617164.0,657327.0,1406631776.0,Asia,31.0,2648.0,2.0,,,47.0
3,France,39931043,,166164.0,,39625636.0,,139243.0,869.0,608849.0,2534.0,271490188.0,4139547.0,65584518.0,Europe,2.0,395.0,0.0,,,2123.0
4,Germany,38392366,,172428.0,,38173500.0,2900.0,46438.0,,457686.0,2056.0,122332384.0,1458359.0,83883596.0,Europe,2.0,486.0,1.0,,,554.0
5,Brazil,37407232,,701215.0,,36249161.0,,456856.0,,173701.0,3256.0,63776166.0,296146.0,215353593.0,South America,6.0,307.0,3.0,,,2121.0
6,Japan,33628545,10074.0,74338.0,24.0,21725273.0,,11828934.0,59.0,267776.0,592.0,99493194.0,792239.0,125584838.0,Asia,4.0,1689.0,1.0,80.0,0.2,94191.0
7,S. Korea,31053459,13596.0,34408.0,7.0,30809499.0,4340.0,209552.0,142.0,604978.0,670.0,15804065.0,307892.0,51329899.0,Asia,2.0,1492.0,3.0,265.0,0.1,4082.0
8,Italy,25737170,,189391.0,,25413901.0,,133878.0,83.0,427082.0,3143.0,270717229.0,4492280.0,60262770.0,Europe,2.0,318.0,0.0,,,2222.0
9,UK,24555629,,221943.0,,24239333.0,,94353.0,,358487.0,3240.0,522526476.0,7628357.0,68497907.0,Europe,3.0,309.0,0.0,,,1377.0
10,Russia,22796845,7116.0,398007.0,33.0,22189176.0,7409.0,209662.0,,156351.0,2730.0,273400000.0,1875095.0,145805947.0,Europe,6.0,366.0,1.0,49.0,0.2,1438.0


In [7]:
# export to csv if needed
#df.to_csv('covid.csv')

In [12]:
# split the dataframe by continent
northamerica_df = df[df['Continent'] == 'North America']
southamerica_df = df[df['Continent'] == 'South America']
asia_df = df[df['Continent'] == 'Asia']
africa_df = df[df['Continent'] == 'Africa']
europe_df = df[df['Continent'] == 'Europe']
australia_df = df[df['Continent'] == 'Australia']

In [13]:
# show example
northamerica_df

Unnamed: 0_level_0,"Country,Other",TotalCases,NewCases,TotalDeaths,NewDeaths,TotalRecovered,NewRecovered,ActiveCases,"Serious,Critical",Tot Cases/1M pop,Deaths/1M pop,TotalTests,Tests/\n1M pop\n,Population,Continent,1 Caseevery X ppl,1 Deathevery X ppl,1 Testevery X ppl,New Cases/1M pop,New Deaths/1M pop,Active Cases/1M pop
#,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,USA,106524168,,1158842,,104423700.0,,941626.0,1517.0,318168,3461,1176315905.0,3513433.0,334805269,North America,3,289,0.0,,,2812.0
19,Mexico,7572705,1949.0,333732,14.0,6817972.0,871.0,421001.0,,57560,2537,19870676.0,151036.0,131562772,North America,17,394,7.0,15.0,0.1,3200.0
33,Canada,4641301,,52247,,4569105.0,1098.0,19949.0,99.0,120904,1361,66343123.0,1728207.0,38388419,North America,8,735,1.0,,,520.0
62,Guatemala,1247237,,20189,,1226129.0,,919.0,5.0,67113,1086,7179223.0,386311.0,18584039,North America,15,921,3.0,,,49.0
64,Costa Rica,1226315,,9326,,860711.0,,356278.0,52.0,236633,1800,4659757.0,899158.0,5182354,North America,4,556,1.0,,,68748.0
67,Cuba,1113074,,8530,,1104413.0,,131.0,2.0,98453,754,14344078.0,1268753.0,11305652,North America,10,1325,1.0,,,12.0
71,Panama,1036100,,8618,,1026868.0,,614.0,16.0,232990,1938,7636393.0,1717215.0,4446964,North America,4,516,1.0,,,138.0
82,Dominican Republic,660961,,4384,,644785.0,,11792.0,4.0,59781,397,3740928.0,338350.0,11056370,North America,17,2522,3.0,,,1067.0
94,Honduras,472533,,11112,,,,,105.0,46230,1087,1652947.0,161717.0,10221247,North America,22,920,6.0,,,32176.0
111,Martinique,229807,,1098,,,,,12.0,614314,2935,828928.0,2215870.0,374087,North America,2,341,0.0,,,611101.0
