## Web scraping for apac tenders


In [17]:
import os
import requests
import json
from bs4 import BeautifulSoup

In [18]:
headers = {
    "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
                  "AppleWebKit/537.36 (KHTML, like Gecko) "
                  "Chrome/120.0.0.0 Safari/537.36"
}

# Single tender page for SG
LIST_URL = "https://apactenders.com/tenders/"

In [19]:
def get_page(url):
    """Fetch a webpage with polite sleeping."""
    response = requests.get(url, headers=headers)
    response.raise_for_status()
    return BeautifulSoup(response.text, "html.parser")

def scrape_tenders(soup):
    tenders = []
    articles = soup.find_all('article', class_='ee-post')
    for idx, article in enumerate(articles, 1):
        # Title and detail URL
        title_tag = article.find('a', class_='bde-text-link-111-107')
        title = ""
        detail_url = ""
        if title_tag:
            h5 = title_tag.find('h5')
            title = h5.get_text(strip=True) if h5 else title_tag.get_text(strip=True)
            detail_url = title_tag.get('href', "")
        # Organization (first icon-list text)
        org = ""
        country = ""
        publish_date = ""
        deadline_date = ""
        icon_texts = article.find_all('span', class_='bde-icon-list__text')
        if icon_texts:
            org = icon_texts[0].get_text(strip=True) if len(icon_texts) > 0 else ""
            country = icon_texts[1].get_text(strip=True) if len(icon_texts) > 1 else ""
            # Publish date is usually the third
            publish_date = icon_texts[2].get_text(strip=True) if len(icon_texts) > 2 else ""
            # Deadline/Closing date is usually the fourth, may have 'Closing Date:' prefix
            if len(icon_texts) > 3:
                deadline_raw = icon_texts[3].get_text(strip=True)
                if 'Closing Date:' in deadline_raw:
                    deadline_date = deadline_raw.replace('Closing Date:', '').strip()
                else:
                    deadline_date = deadline_raw
        tenders.append({
            "no": idx,
            "title": title,
            "organization": org,
            "country": country,
            "publish_date": publish_date,
            "deadline_date": deadline_date,
            "detail_url": detail_url
        })
    return tenders

In [20]:
if __name__ == "__main__":
    os.makedirs("output", exist_ok=True)
    soup = get_page(LIST_URL)
    # Save HTML for inspection
    with open("output/listing_page.html", "w", encoding="utf-8") as f:
        f.write(str(soup))
    # Scraped tenders for the first page
    tenders = scrape_tenders(soup)
    with open("output/scrap_output.json", "w", encoding="utf-8") as f:
        json.dump(tenders, f, ensure_ascii=False, indent=2)