In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
from tqdm import tqdm

BASE_URL = "https://www.caribank.org"
START_URL = BASE_URL + "/our-work/projects-map/list-projects"
PROJECTS = []

def get_soup(url):
    resp = requests.get(url)
    resp.raise_for_status()
    return BeautifulSoup(resp.text, 'html.parser')

def extract_project_rows(soup):
    rows = soup.select("tbody tr")
    data = []
    for row in rows:
        try:
            title_tag = row.find('a', href=True)
            title = title_tag.text.strip()
            link = BASE_URL + title_tag['href']
        except Exception:
            title = link = ""
        try:
            status = row.select_one('div span[class^="icon-status"]').find_next_sibling(text=True)
            if status:
                status = status.strip()
            else:
                status = row.find('div').text.strip()
        except Exception:
            status = ""
        try:
            country = row.select_one('[headers="view-field-cdb-bg-country-tag-table-column"]').get_text(strip=True)
        except Exception:
            country = ""
        try:
            sector = row.select_one('[headers="view-field-sector-tag-table-column"]').get_text(strip=True)
        except Exception:
            sector = ""
        try:
            project_total = row.select_one('[headers="view-field-cdb-approved-budget-table-column"]').get_text(strip=True)
        except Exception:
            project_total = ""
        try:
            approved = row.select_one('[headers="view-field-date-of-approval-table-column"]').get_text(strip=True)
        except Exception:
            approved = ""
        data.append({
            "Project Title": title,
            "Project URL": link,
            "Status": status,
            "Country": country,
            "Sector & Theme": sector,
            "Project Total": project_total,
            "Approved Date": approved
        })
    return data

def get_total_pages():
    soup = get_soup(START_URL)
    pager = soup.select_one('.pager__items')
    if not pager:
        return 1
    pages = [int(a.text) for a in pager.select('a') if a.text.isdigit()]
    return max(pages) if pages else 1

if __name__ == '__main__':
    total_pages = get_total_pages()
    print(f"Total pages found: {total_pages}")
    for page in tqdm(range(total_pages)):
        url = START_URL + f"?page={page}" if page > 0 else START_URL
        soup = get_soup(url)
        page_data = extract_project_rows(soup)
        PROJECTS.extend(page_data)

    df = pd.DataFrame(PROJECTS)
    df.to_excel("cdb_projects.xlsx", index=False)
    print("Done. Data saved to cdb_projects.xlsx")


Total pages found: 1


  status = row.select_one('div span[class^="icon-status"]').find_next_sibling(text=True)
100%|██████████| 1/1 [00:00<00:00,  2.23it/s]


Done. Data saved to cdb_projects.xlsx
