In [299]:
#importing the libraries
import pandas as pd
import requests
from bs4 import BeautifulSoup
import numpy as np

In [300]:
# get the response in the form of html
wikiurl="https://en.wikipedia.org/wiki/COVID-19_pandemic_by_country_and_territory"
table_class="wikitable sortable jquery-tablesorter"
response=requests.get(wikiurl)
print(response.status_code) # to check whether we have acess to scarp

200


In [301]:
soup = BeautifulSoup(response.text, 'html.parser')# using beautiful soup we are finding tables
covid_table=soup.find('table',{'class':"wikitable"})

In [302]:
covid_table

<table class="wikitable plainrowheaders sortable" id="thetable" style="text-align:right; margin:0 0 0.5em 1em;width:97%;">
<caption><div class="covid-show-table" style="font-size:80%;font-weight:500;"><a href="#covid-19-pandemic-data">[show all]</a></div><div class="covid-collapse-table" style="font-size:80%;font-weight:500;float: right;"><a href="#void">[collapse]</a></div><style data-mw-deduplicate="TemplateStyles:r992953826">.mw-parser-output .navbar{display:inline;font-size:88%;font-weight:normal}.mw-parser-output .navbar-collapse{float:left;text-align:left}.mw-parser-output .navbar-boxtext{word-spacing:0}.mw-parser-output .navbar ul{display:inline-block;white-space:nowrap;line-height:inherit}.mw-parser-output .navbar-brackets::before{margin-right:-0.125em;content:"[ "}.mw-parser-output .navbar-brackets::after{margin-left:-0.125em;content:" ]"}.mw-parser-output .navbar li{word-spacing:-0.125em}.mw-parser-output .navbar-mini abbr{font-variant:small-caps;border-bottom:none;text-decor

In [306]:
# convert list to dataframe
df=pd.read_html(str(covid_table))
df=pd.DataFrame(df[0])# 
print(df.head())

         Location[a]                    ...  Recov.[d]      Ref.
  Unnamed: 0_level_1          World[e]  ... 47,289,349       [4]
0                NaN  United States[f]  ...    9170094      [15]
1                NaN             India  ...    9883461      [16]
2                NaN            Brazil  ...    6756284  [17][18]
3                NaN         Russia[g]  ...    2599035      [19]
4                NaN         France[h]  ...     194901  [20][21]

[5 rows x 6 columns]


In [307]:
df.columns = df.columns.droplevel(0) # removing multi indexing
df = df.drop(df.columns[[0,5]],axis=1) 

In [308]:
df 

Unnamed: 0,World[e],"83,963,941","1,827,544","47,289,349"
0,United States[f],20331785,350853,9170094
1,India,10286709,148994,9883461
2,Brazil,7700578,195441,6756284
3,Russia[g],3212637,58002,2599035
4,France[h],2639773,64765,194901
...,...,...,...,...
234,Samoa,2,0,0
235,Vanuatu,1,0,1
236,Tanzania[be],No data,No data,No data
237,As of 1 January 2021 (UTC) · History of cases ...,As of 1 January 2021 (UTC) · History of cases ...,As of 1 January 2021 (UTC) · History of cases ...,As of 1 January 2021 (UTC) · History of cases ...


In [309]:
#since there are some unwanted rows are present so we remove it
df=df.drop([237,238])

In [311]:
df.columns=['country','cases','deaths','recovered']#adding column names to dataframe

In [312]:
df

Unnamed: 0,country,cases,deaths,recovered
0,United States[f],20331785,350853,9170094
1,India,10286709,148994,9883461
2,Brazil,7700578,195441,6756284
3,Russia[g],3212637,58002,2599035
4,France[h],2639773,64765,194901
...,...,...,...,...
232,Wallis and Futuna,4,0,1
233,American Samoa,3,0,0
234,Samoa,2,0,0
235,Vanuatu,1,0,1


In [313]:
df['country']=df['country'].str.replace(r"\[.*\]","") # removing unwanted characters from country column

In [314]:
df

Unnamed: 0,country,cases,deaths,recovered
0,United States,20331785,350853,9170094
1,India,10286709,148994,9883461
2,Brazil,7700578,195441,6756284
3,Russia,3212637,58002,2599035
4,France,2639773,64765,194901
...,...,...,...,...
232,Wallis and Futuna,4,0,1
233,American Samoa,3,0,0
234,Samoa,2,0,0
235,Vanuatu,1,0,1


In [315]:
# removing rows which contain no data
new=df[pd.to_numeric(df['recovered'], errors='coerce').notnull()]

In [317]:
new

Unnamed: 0,country,cases,deaths,recovered
0,United States,20331785,350853,9170094
1,India,10286709,148994,9883461
2,Brazil,7700578,195441,6756284
3,Russia,3212637,58002,2599035
4,France,2639773,64765,194901
...,...,...,...,...
231,Marshall Islands,4,0,2
232,Wallis and Futuna,4,0,1
233,American Samoa,3,0,0
234,Samoa,2,0,0


In [318]:
new['cases']=new['cases'].astype(str).astype('int64')# converting data type of dataframe into int
new['deaths']=new['deaths'].astype(str).astype('int64')
new['recovered']=new['recovered'].astype(str).astype('int64')

In [319]:
index_names = new[ new['deaths'] == 0].index # removing rows which contain zero deaths
new.drop(index_names, inplace = True) 

In [320]:
new

Unnamed: 0,country,cases,deaths,recovered
0,United States,20331785,350853,9170094
1,India,10286709,148994,9883461
2,Brazil,7700578,195441,6756284
3,Russia,3212637,58002,2599035
4,France,2639773,64765,194901
...,...,...,...,...
205,Northern Mariana Islands,122,2,32
209,British Virgin Islands,72,1,70
211,Fiji,49,2,44
216,Sahrawi Arab DR,31,3,27


In [325]:
k=new["cases"]/new["deaths"]
new["cases_per_deaths"]=k # adding new column case per death to existing data frame

In [284]:
new

Unnamed: 0,country,cases,deaths,recovered,cases_per_deaths
0,United States,20331785,350853,9170094,57.949583
1,India,10286709,148994,9883461,69.041096
2,Brazil,7700578,195441,6756284,39.401037
3,Russia,3212637,58002,2599035,55.388383
4,France,2639773,64765,194901,40.759253
...,...,...,...,...,...
205,Northern Mariana Islands,122,2,32,61.000000
209,British Virgin Islands,72,1,70,72.000000
211,Fiji,49,2,44,24.500000
216,Sahrawi Arab DR,31,3,27,10.333333


In [332]:
sorted=new.sort_values(by='cases_per_deaths', ascending=False) # sorting data frame based on cases per death
sorted.reset_index(drop=True, inplace=True)

In [333]:
sorted.head(20)

Unnamed: 0,country,cases,deaths,recovered,cases_per_deaths
0,Singapore,58629,29,58449,2021.689655
1,Mongolia,1215,1,830,1215.0
2,USS Theodore Roosevelt,1102,1,751,1102.0
3,Eritrea,877,1,599,877.0
4,Qatar,144042,245,141556,587.926531
5,Burundi,760,2,687,380.0
6,Gibraltar,2212,6,1249,368.666667
7,United Arab Emirates,209678,671,186019,312.485842
8,Botswana,12340,40,11627,308.5
9,Curaçao,4260,14,2925,304.285714
