In [None]:
# Install BeautifulSoup4 (bs4) for web scraping.
pip install bs4

# Install the 'requests' library for making HTTP requests.
pip install requests

# Install 'texttable' for creating text-based tables to display data.
pip install texttable

In [9]:
#Import libraries

import requests
from bs4 import BeautifulSoup
import texttable as texttable
import pandas as pd
import numbers as np

In [10]:
# Define the URL.
url = 'https://www.worldometers.info/coronavirus/countries-where-coronavirus-has-spread/'

In [11]:
# Get and store the webpage content.
covidWeb = requests.get(url)

# Parse the webpage content.
soup = BeautifulSoup(covidWeb.text, 'html.parser')

# Create an empty list to store the data extracted from the webpage.
covidData = []


In [12]:
# Find all 'td' (table cell) elements found in the webpage.
iteratedData = iter(soup.findAll('td'))


In [13]:
# Iterate through the data in the HTML elements and extract information.
while True:
    try:
        # Extract data for each category and append it to the list.
        country = next(iteratedData).text
        confirmedCases = next(iteratedData).text
        deaths = next(iteratedData).text
        continents = next(iteratedData).text

        covidData.append((
            country,
            int(confirmedCases.replace(',', '')),
            int(deaths.replace(',', '')),
            continents
        ))
        
    # Stop the loop when thera are no more elements to process.
    except StopIteration:
        break

In [14]:
# Sort the  list based on the first element of each tuple.
covidData.sort(key=lambda x: x[0])

covidData

[('Afghanistan', 227214, 7952, 'Asia'),
 ('Albania', 334726, 3602, 'Europe'),
 ('Algeria', 271945, 6881, 'Africa'),
 ('Andorra', 48015, 165, 'Europe'),
 ('Angola', 105972, 1936, 'Africa'),
 ('Anguilla', 3904, 12, 'North America'),
 ('Antigua and Barbuda', 9106, 146, 'North America'),
 ('Argentina', 10075418, 130644, 'South America'),
 ('Armenia', 450334, 8762, 'Asia'),
 ('Aruba', 44224, 292, 'North America'),
 ('Australia', 11787077, 22869, 'Australia/Oceania'),
 ('Austria', 6081287, 22542, 'Europe'),
 ('Azerbaijan', 833955, 10353, 'Asia'),
 ('Bahamas', 38084, 844, 'North America'),
 ('Bahrain', 729191, 1574, 'Asia'),
 ('Bangladesh', 2045843, 29477, 'Asia'),
 ('Barbados', 110235, 641, 'North America'),
 ('Belarus', 994037, 7118, 'Europe'),
 ('Belgium', 4823665, 34376, 'Europe'),
 ('Belize', 71060, 688, 'North America'),
 ('Benin', 28036, 163, 'Africa'),
 ('Bermuda', 18860, 165, 'North America'),
 ('Bhutan', 62697, 21, 'Asia'),
 ('Bolivia', 1209081, 22404, 'South America'),
 ('Bosnia an

In [15]:
# Se crea el objeto de tabla de texto
covidDataTable = texttable.Texttable()
 
# Se agregue una fila vacía al principio para los encabezados
covidDataTable.add_rows([(None, None, None, None)] + covidData)
 
# Para formatear la tabla, usamos: 
# 'l' alineado a la izquierda
# 'c' centrado
# 'r' alineado a la derecha

covidDataTable.set_cols_align(('c', 'c', 'c', 'c')) 
covidDataTable.header((' Country ', ' Nº of cases ', ' Nº of deaths ', ' Continent '))
 
print(covidDataTable.draw())


+-------------------------+---------------+----------------+-------------------+
|         Country         |  Nº of cases  |  Nº of deaths  |     Continent     |
|       Afghanistan       |    227214     |      7952      |       Asia        |
+-------------------------+---------------+----------------+-------------------+
|         Albania         |    334726     |      3602      |      Europe       |
+-------------------------+---------------+----------------+-------------------+
|         Algeria         |    271945     |      6881      |      Africa       |
+-------------------------+---------------+----------------+-------------------+
|         Andorra         |     48015     |      165       |      Europe       |
+-------------------------+---------------+----------------+-------------------+
|         Angola          |    105972     |      1936      |      Africa       |
+-------------------------+---------------+----------------+-------------------+
|        Anguilla         | 

##### If we wanted to dump this information into a Dataframe, how would we go about it?

In [16]:
# Get the table as a string from the Texttable object.
tabla_string = covidDataTable.draw()

# Split the table string into lines and store the non-empty lines.
tabla_data = [line.strip() for line in tabla_string.split('\n') if line.strip()]

# Extract column names from the second line of the table.
column_names = [name.strip() for name in tabla_data[1].split('|') if name.strip()]

# Extract data rows starting from the fourth line.
data_rows = [row.strip() for row in tabla_data[3:-1]]

# Split each data row using '|' as a separator .
data_rows = [row.split('|')[1:-1] for row in data_rows]

# Create a Pandas DataFrame.
df = pd.DataFrame(data_rows, columns=column_names)

df


Unnamed: 0,Country,Nº of cases,Nº of deaths,Continent
0,Afghanistan,227214,7952,Asia
1,,,,
2,Albania,334726,3602,Europe
3,,,,
4,Algeria,271945,6881,Africa
...,...,...,...,...
458,Yemen,11945,2159,Asia
459,,,,
460,Zambia,349287,4069,Africa
461,,,,
