### Import all necessary libraries

In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

### Send a request to the website

In [2]:
response = requests.get("https://en.wikipedia.org/wiki/COVID-19_pandemic")

In [3]:
response.status_code

200

### Create a BeautifulSoup object

In [4]:
soup = BeautifulSoup(response.content, 'html.parser')

In [5]:
#soup.prettify()

### Search for required data and extract it

In [6]:
print(soup.title.text)

COVID-19 pandemic - Wikipedia


In [7]:
table= soup.find('table', attrs={"class" : "wikitable"})

In [8]:
trs = table.select("tbody tr")[2:230]

#### Extracting data for one row/ country..

In [9]:
tr1 = trs[3]
th1 = tr1.find_all("th", attrs = {'scope' : 'row'})
th1

[<th scope="row"><img alt="" class="thumbborder" data-file-height="900" data-file-width="1350" decoding="async" height="15" src="//upload.wikimedia.org/wikipedia/en/thumb/4/41/Flag_of_India.svg/23px-Flag_of_India.svg.png" srcset="//upload.wikimedia.org/wikipedia/en/thumb/4/41/Flag_of_India.svg/35px-Flag_of_India.svg.png 1.5x, //upload.wikimedia.org/wikipedia/en/thumb/4/41/Flag_of_India.svg/45px-Flag_of_India.svg.png 2x" width="23"/>
 </th>,
 <th scope="row"><a href="/wiki/COVID-19_pandemic_in_India" title="COVID-19 pandemic in India">India</a>
 </th>]

In [10]:
print(th1[0].find("img")['src'])

//upload.wikimedia.org/wikipedia/en/thumb/4/41/Flag_of_India.svg/23px-Flag_of_India.svg.png


We need data from the 'a' tag of second 'th' tag of tr1

In [11]:
print(th1[1].find('a').text)

India


#### Extracting number of cases, deaths and recoveries for one country..

In [12]:
td1 = tr1.find_all("td")
print(td1)

[<td>440,215
</td>, <td>14,011
</td>, <td>248,189
</td>, <td><sup class="reference" id="cite_ref-59"><a href="#cite_note-59">[48]</a></sup>
</td>]


In [13]:
print(td1[0].text, 
      td1[1].text, 
      td1[2].text)

440,215
 14,011
 248,189



We'll see some values preceeded by '/n' used for new line in the HTML code. 
replace() function is used to get rid of it.

#### Write the same code in a loop to extract values for all the countries.

In [14]:
Country = []
Cases = []
Deaths = []
Recoveries = []

In [15]:
for tr in trs:
    Country.append(tr.find_all("th", attrs = {'scope' : 'row'})[1].find('a').text) 
    
    tds = tr.find_all("td")
    Cases.append(tds[0].text.replace("\n", "").strip())
    Deaths.append(tds[1].text.replace("\n", "").strip())
    Recoveries.append(tds[2].text.replace("\n", "").strip())


### Store extracted data in a dataframe

In [16]:
data = list(zip(Country, Cases, Deaths, Recoveries))

In [17]:
COVID_data = pd.DataFrame(data, columns=['Country', 'Cases', 'Deaths', 'Recoveries'])

In [18]:
COVID_data.head(10)

Unnamed: 0,Country,Cases,Deaths,Recoveries
0,United States,2357440,122162,733472
1,Brazil,1113606,51406,579226
2,Russia,599705,8359,356429
3,India,440215,14011,248189
4,United Kingdom,305289,42647,No data
5,Peru,257447,8223,145320
6,Chile,250767,4505,No data
7,Spain,246504,28324,150376
8,Italy,238720,34657,183426
9,Iran,207525,9742,166427


In [19]:
COVID_data.shape

(228, 4)

In [20]:
COVID_data.to_csv("world_covid.csv")