In [31]:
from bs4 import BeautifulSoup
import requests
import pandas as pd
import csv

## List of central Africa countries to scrape hospitals for

In [19]:
countries = [
    "Angola",
    "Burundi",
    "Cameroon",
    "Central African Republic",
    "Chad",
    "Democratic Republic of the Congo",
    "Equatorial Guinea",
    "Gabon",
    "Republic of the Congo",
    "Rwanda",
    "São Tomé and Príncipe"
]

## Scrap list of hospitals for each central African country 

In [51]:
def getHospitalsListCountry(country_name):
    """
    Fetches a list of hospitals from the Wikipedia page for the specified country.
    
    Parameters:
    - country_name (str): The name of the country to fetch hospitals for.
    
    Returns:
    - list: A list of hospital names.
    """
    # URL
    country_formatted = country_name.replace(' ', '_')
    url = f"https://en.wikipedia.org/wiki/List_of_hospitals_in_{country_formatted}"

    hospitals = []  
    try:
        response = requests.get(url)
        soup = BeautifulSoup(response.content, 'html.parser')

        content_blocks = soup.findAll(['table', 'ul'], {'class': ['wikitable', 'toc']})

        for block in content_blocks:
            if block.name == 'table':
                rows = block.findAll('tr')
                for row in rows[1:]:  # Skip the header row
                    first_column = row.find('td')
                    if first_column:
                        hospitals.append(first_column.text.strip())
            elif block.name == 'ul':
                items = block.findAll('li')
                for item in items:
                    hospitals.append(item.text.strip())

    except Exception as e:
        print(f"An error occurred while processing {country_name}: {e}")

    return hospitals

# Loop through the countries and print out the hospitals
for country in countries:
    print(f"Hospitals in {country}:")
    hospitals = get_hospitals_for_country(country)
    if hospitals:
        for hospital in hospitals:
            print(f"- {hospital}")
    else:
        print("No hospitals found or page structure differs.")
    print("\n---\n")

Hospitals in Angola:
- ('Angola', 'Bongo Mission Hospital')
- ('Angola', 'Cajueiros General Hospital')
- ('Angola', 'Clínica Multiperfil')
- ('Angola', 'Hospital Our Lady of Peace (Nossa Senhora da Paz) (Spanish Wikipedia)')
- ('Angola', 'Josina Machel Hospital')
- ('Angola', 'Lucrécia Paím Maternity Hospital')
- ('Angola', 'Hospital Américo Boavida')
- ('Angola', 'Hospital Do Prenda')
- ('Angola', 'Hospital Geral Camama')
- ('Angola', 'Hospital Geral Kilamba Kiaxi')
- ('Angola', 'Hospital Municipal de Luanda')
- ('Angola', 'Hospital Neves Bendinha')
- ('Angola', 'Hospital Sanatório')
- ('Angola', 'Consol Provincial de Kuango Kubango')
- ('Angola', 'Hospital Municipal do Kuvango Municipal')
- ('Angola', 'Hospital Municipal de Cafunfo')
- ('Angola', 'Hospital Municipal de Lucapa')
- ('Angola', 'Hospital Municipal de Nzagi Cambulo')
- ('Angola', 'Hospital Provincial da Luanda Norte')
- ('Angola', 'Consol Provincial de Luanda Sul')
- ('Angola', 'Hospital Barra Do Dande')
- ('Angola', 'Hos

## Scrap countries and combine them with list of hospitals scraped into a csv

In [47]:
def getHospitalsPerCountry(country_name):
    """
    Fetches a list of hospitals from the Wikipedia page for the specified country.
    
    Parameters:
    - country_name (str): The name of the country to fetch hospitals for.
    
    Returns:
    - list of tuples: A list of tuples containing the country name and a hospital name.
    """
    country_formatted = country_name.replace(' ', '_')
    url = f"https://en.wikipedia.org/wiki/List_of_hospitals_in_{country_formatted}"
    hospitals = []

    try:
        response = requests.get(url)
        soup = BeautifulSoup(response.content, 'html.parser')
        content_blocks = soup.findAll(['table', 'ul'], {'class': ['wikitable', 'toc']})

        for block in content_blocks:
            if block.name == 'table':
                rows = block.findAll('tr')
                for row in rows[1:]:
                    first_column = row.find('td')
                    if first_column:
                        hospitals.append((country_name, first_column.text.strip()))
            elif block.name == 'ul':
                items = block.findAll('li')
                for item in items:
                    hospitals.append((country_name, item.text.strip()))
    except Exception as e:
        print(f"An error occurred while processing {country_name}: {e}")

    return hospitals

# Aggregate hospitals for countries
all_hospitals = []
for country in countries:
    all_hospitals.extend(get_hospitals_for_country(country))

# Write results to a CSV file
csv_hospitals = "hospitals_list.csv"
with open(csv_hospitals, mode='w', newline='', encoding='utf-8') as file:
    writer = csv.writer(file)
    writer.writerow(["Country", "Hospital Name"])
    writer.writerows(all_hospitals)

print(f"Scraping complete. Results saved to {csv_hospitals}.")

Scraping complete. Results saved to hospitals_list.csv.


In [48]:
df_hospitals = pd.read_csv(csv_hospitals)
print(df_hospitals)

    Country                                      Hospital Name
0    Angola                             Bongo Mission Hospital
1    Angola                         Cajueiros General Hospital
2    Angola                                Clínica Multiperfil
3    Angola  Hospital Our Lady of Peace (Nossa Senhora da P...
4    Angola                             Josina Machel Hospital
..      ...                                                ...
341   Gabon              Benjamin Ngoubou Hospitalier Regional
342   Gabon                     Tchibanga Hospitalier Regional
343   Gabon                  Amissa Bongo Hospitalier Regional
344   Gabon          Sino Gabonaise Hopital Cooperation Mpassa
345   Gabon                          Hôpital Albert Schweitzer

[346 rows x 2 columns]


## Scrap countries per medical facilities and population

In [41]:
url = "https://en.wikipedia.org/wiki/Lists_of_hospitals_in_Africa"

response = requests.get(url)
soup = BeautifulSoup(response.content, 'html.parser')

# find the first wikitable on the url
table = soup.find('table', {'class': 'wikitable'})

if table is None:
    print("Table not found. Check the class name or structure of the webpage.")
else:
    data = []

    # Iterate over each row in the table, skipping the header
    for row in table.find_all('tr')[1:]:
        cols = row.find_all('td')
        if cols:
            country_name = cols[0].text.strip()
            # medical facilities is located in the fourth column
            facilities = cols[3].text.strip().replace(',', '')
            # Population
            population = cols[4].text.strip().replace(',', '')
            
            if country_name in countries:
                data.append([country_name, facilities, population])

    # Write to CSV
    csv_mfp = 'medical_facilities_population.csv'
    with open(csv_mfp, 'w', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        writer.writerow(['Country', 'Medical Facilities', 'Population (1,000)'])
        writer.writerows(data)

    print(f'Data successfully saved to {csv_mfp}.')

Data successfully saved to medical_facilities_population.csv.


In [52]:
df_mfp = pd.read_csv(csv_mfp)
print(df_mfp)

                             Country  Medical Facilities  Population (1,000)
0                             Angola                1575               24383
1                            Burundi                 665                9824
2                           Cameroon                3061               21918
3           Central African Republic                 555                3859
4                               Chad                1283               11040
5   Democratic Republic of the Congo               14586              102561
6              Republic of the Congo                 328                3697
7                  Equatorial Guinea                  47                1222
8                              Gabon                 542                1802
9                             Rwanda                 572               10516
10             São Tomé and Príncipe                  50                 202
