In [3]:
import bs4 as bs
import urllib.request
import pandas as pd
import country_converter as coco

ref_headers = ['Country,Other',
  'TotalCases',
  'NewCases',
  'TotalDeaths',
  'NewDeaths',
  'TotalRecovered',
  'NewRecovered',
  'ActiveCases',
  'Serious,Critical',
  'Tot\xa0Cases/1M pop',
  'Deaths/1M pop',
  'TotalTests',
  'Tests/\n1M pop\n',
  'Population']

url='https://www.worldometers.info/coronavirus/'

req = urllib.request.Request(url, headers={'User-Agent': 'Mozilla/5.0'})

source = urllib.request.urlopen(req).read()
# source = urllib.request.urlopen(url).read()
soup = bs.BeautifulSoup(source,'lxml')

table = soup.find('table', attrs={'id':'main_table_countries_today'})
table_rows = table.find_all('tr')
# table_rows
list_rows = []
for tr in table_rows:
    td = tr.find_all('td')
    row = [tr.text for tr in td]
    list_rows.append(row)
list_rows = [el for el in list_rows if el]

table_header = table.find_all('th')
columns = [cell.text for cell in table_header]
df = pd.DataFrame(list_rows, columns=columns)

df = df[ref_headers]

df = df.drop_duplicates(subset= 'Country,Other', keep='first')

Unnamed: 0,"Country,Other",TotalCases,NewCases,TotalDeaths,NewDeaths,TotalRecovered,NewRecovered,ActiveCases,"Serious,Critical",Tot Cases/1M pop,Deaths/1M pop,TotalTests,Tests/\n1M pop\n,Population
0,\nNorth America\n,7450519,+40679,274302,+1639,4325468,+39999,2850749,19446,,,,,
1,\nSouth America\n,6400791,+12178,205053,+458,5030288,+10939,1165450,15224,,,,,
2,\nAsia\n,7296376,+107887,145640,+1553,5854882,+86772,1295854,18842,,,,,
3,\nEurope\n,3645760,+34445,208559,+385,2097475,+10717,1339726,5841,,,,,
4,\nAfrica\n,1270944,+6527,30303,+199,1008204,+5650,232437,1278,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
219,MS Zaandam,9,,2,,,,7,,,,,,
220,Saint Pierre Miquelon,5,,,,4,,1,,864,,1051,181551,5789
221,Anguilla,3,,,,3,,0,,200,,808,53773,15026
222,China,85066,+8,4634,,80234,+26,198,4,59,3,160000000,111163,1439323776


In [4]:
# Convert values to float
for i,col_name in enumerate(df.columns):
    print
    if i!=0:
        df[col_name] = pd.to_numeric(df[col_name].apply(lambda x:x.replace(",","")),errors='coerce')
        
# Convert country to ISO codes
countries_list = df["Country,Other"]\
.apply(lambda x: x.strip()).replace({'UK': 'Great Britain', 'UAE': 'United Arab Emirates'}).values.tolist()
df["iso_alpha"] = pd.Series(coco.convert(names=countries_list, to='ISO3', not_found=None))

# Rename comma seperated cols
df = df.rename(columns={'Country,Other': 'Country',
                  'Serious,Critical': 'Critical'})

df = df.fillna(0)

# Create text that will be display on hover
df["text"] = df['Country'].apply(lambda x: x.strip()) + '<br>' + \
    'Active Cases ' + df['ActiveCases'].astype(int).astype(str) + \
    '<br>' + 'Total Deaths ' + df['TotalDeaths'].astype(int).astype(str)



In [5]:
# Export Dataframe
df.to_csv("static/data/corona.csv",index=False,sep=",")

### Visualize df using Plotly (Optional)

In [6]:
# import plotly.express as px
# import datetime
# today_date = datetime.datetime.today().date().strftime("%d-%m-%Y")
# fig = px.choropleth(df, locations="iso_alpha",
#                     color="TotalCases",
#                     hover_name="Country",
#                     color_continuous_scale=px.colors.diverging.Portland,
#                    title='Daily Coronavirus Cases in the Word [{}]'.format(today_date)\
#                     +' Source: <a https://www.worldometers.info/coronavirus/">Worldometers</a>',
#                    height=600,
#                    range_color=[0,1000],
#                    labels={'TotalCases':'Min Number of cases'})
# fig.show()