In [20]:
import requests
import lxml.html as lh
import pandas as pd
import numpy as np
import country_converter as coco

### Build df from Url

In [21]:
url='https://www.worldometers.info/coronavirus/'

# Scraping the Url
page = requests.get(url)
doc = lh.fromstring(page.content)
todaydoc = doc.get_element_by_id("main_table_countries_today")

# Parse data
th_elements = todaydoc.xpath('//th') # header
td_elements = todaydoc.xpath('//td') # cells content

headers = [th_element.text_content() for th_element in th_elements]
headers = headers[:len(headers)//2]

content = [td_element.text_content() for td_element in td_elements]
rows_content = np.array(content).reshape(int(len(content)/len(headers)),len(headers)).tolist()[:-1] #rm Today row
# rows_content = rows_content[:len(rows_content)//2]

df = pd.DataFrame(rows_content)
df.columns = headers

In [24]:
content

['World',
 '1,253,066',
 '+51,613',
 '68,154',
 '+3,466',
 '257,199',
 '927,713',
 '45,379',
 '161',
 '8.7',
 '',
 '',
 'USA',
 '327,871',
 '+16,514',
 '9,325 ',
 '+873',
 '16,700',
 '301,846',
 '8,519',
 '991',
 '28',
 '1,725,812',
 '5,214',
 'Spain',
 '130,759',
 '+4,591',
 '12,418 ',
 '+471',
 '38,080',
 '80,261',
 '6,861',
 '2,797',
 '266',
 '355,000',
 '7,593',
 'Italy',
 '128,948',
 '+4,316',
 '15,887 ',
 '+525',
 '21,815',
 '91,246',
 '3,977',
 '2,133',
 '263',
 '691,461',
 '11,436',
 'Germany',
 '98,765',
 '+2,673',
 '1,524 ',
 '+80',
 '26,400',
 '70,841',
 '3,936',
 '1,179',
 '18',
 '918,460',
 '10,962',
 'France',
 '89,953',
 '',
 '7,560 ',
 '',
 '15,438',
 '66,955',
 '6,838',
 '1,378',
 '116',
 '224,254',
 '3,436',
 'Iran',
 '58,226',
 '+2,483',
 '3,603 ',
 '+151',
 '19,736',
 '34,887',
 '4,103',
 '693',
 '43',
 '186,000',
 '2,214',
 'UK',
 '47,806',
 '+5,903',
 '4,934 ',
 '+621',
 '135',
 '42,737',
 '1,559',
 '704',
 '73',
 '195,524',
 '2,880',
 'Turkey',
 '27,069',
 '+3,13

In [3]:
# Convert values to float
for i,col_name in enumerate(df.columns):
    print
    if i!=0:
        df[col_name] = pd.to_numeric(df[col_name].apply(lambda x:x.replace(",","")),errors='coerce')
        
# Convert country to ISO codes
countries_list = df["Country,Other"]\
.apply(lambda x: x.strip()).replace({'UK': 'Great Britain', 'UAE': 'United Arab Emirates'}).values.tolist()
df["iso_alpha"] = pd.Series(coco.convert(names=countries_list, to='ISO3', not_found=None))

# Rename comma seperated cols
df = df.rename(columns={'Country,Other': 'Country',
                  'Serious,Critical': 'Critical'})

df = df.fillna(0)

# Create text that will be display on hover
df["text"] = df['Country'].apply(lambda x: x.strip()) + '<br>' + \
    'Active Cases ' + df['ActiveCases'].astype(int).astype(str) + \
    '<br>' + 'Total Deaths ' + df['TotalDeaths'].astype(int).astype(str)



In [63]:
# Export Dataframe
df.to_csv("static/data/corona.csv",index=False,sep=",")

### Visualize df using Plotly (Optional)

In [65]:
# import plotly.express as px
# import datetime
# today_date = datetime.datetime.today().date().strftime("%d-%m-%Y")
# fig = px.choropleth(df, locations="iso_alpha",
#                     color="TotalCases",
#                     hover_name="Country",
#                     color_continuous_scale=px.colors.diverging.Portland,
#                    title='Daily Coronavirus Cases in the Word [{}]'.format(today_date)\
#                     +' Source: <a https://www.worldometers.info/coronavirus/">Worldometers</a>',
#                    height=600,
#                    range_color=[0,1000],
#                    labels={'TotalCases':'Min Number of cases'})
# fig.show()