In [2]:
from selenium import webdriver
import pandas as pd
import numpy as np

In [15]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager
from urllib.parse import urljoin
import json

In [4]:
PATH = "/usr/local/bin/chromedriver"

# STEP 1: EXTRACT BASE INFO 

In [9]:
URL = "https://oursggrants.gov.sg/grants/new"

def scrape_grants():
    driver = build_driver(headless=False)
    driver.get(URL)
    wait = WebDriverWait(driver, 25)

    # Only containers with the base class and NOT the hashed addon
    wait.until(EC.presence_of_element_located(
        (By.XPATH, "//*[contains(@class,'GrantCard_itemsContainer__4lJ6m') and not(contains(@class,'TOZGeLkvmpCMIdh__'))]")
    ))
    containers = driver.find_elements(
        By.XPATH, "//*[contains(@class,'GrantCard_itemsContainer__4lJ6m') and not(contains(@class,'TOZGeLkvmpCMIdh__'))]"
    )

    results = []
    for box in containers:
        # Anchor wrapping the card
        detail_href = None
        try:
            anchor = box.find_element(By.XPATH, ".//a[@href]")
            href_raw = anchor.get_dom_attribute("href") or anchor.get_attribute("href")
            detail_href = urljoin(URL, href_raw) if href_raw else None
        except:
            pass

        # The actual card element inside the container
        try:
            card = box.find_element(By.CSS_SELECTOR, ".GrantCard_item__1FyqJ.grantItem")
        except:
            # Skip this container if no card found
            continue

        # Icon
        icon_src = None
        try:
            img = card.find_element(By.CSS_SELECTOR, ".GrantCard_agencyIcon__2Evu3 img")
            src_raw = img.get_dom_attribute("src") or img.get_attribute("src")
            icon_src = urljoin(URL, src_raw) if src_raw else None
        except:
            pass

        # Title: inner div inside the title container
        title_text = None
        try:
            title_el = card.find_element(
                By.CSS_SELECTOR, ".GrantCard_itemTitle__1vIcu.GrantCard_grantType__RaZ2H > div"
            )
            title_text = title_el.get_property("innerText").strip()
        except:
            try:
                title_el = card.find_element(
                    By.CSS_SELECTOR, ".GrantCard_itemTitle__1vIcu.GrantCard_grantType__RaZ2H"
                )
                title_text = title_el.get_property("innerText").strip()
            except:
                pass

        # Description
        description_text = None
        try:
            desc_el = card.find_element(By.CSS_SELECTOR, ".GrantCard_itemDescription__2szWZ")
            description_text = desc_el.get_property("innerText").strip()
        except:
            pass

        results.append({
            "icon": icon_src,
            "title": title_text,
            "description": description_text,
            "detail_link": detail_href
        })

    driver.quit()
    return results

data = scrape_grants()
print(f"Fetched {len(data)} cards")
for row in data[:5]:
    print(row)

Fetched 41 cards
{'icon': 'https://oursggrants.gov.sg/AgencyIcon/sportsg/6268ff1eef22a80677d9.svg', 'title': 'Active Citizen Grant', 'description': 'The Active Citizen Grant encourages ground-up initiatives through innovating and organising activities in any of the following three domains: sports volunteerism, physical activity or health & wellness initiatives.', 'detail_link': 'https://oursggrants.gov.sg/grants/ssgacg/instruction'}
{'icon': 'https://oursggrants.gov.sg/AgencyIcon/nyc/nyc.png', 'title': 'Asia-Ready Exposure Programme (AEP)', 'description': 'Supports projects that provide youth with industry and cultural exposure to ASEAN member states, China and India, up to $1,000 per eligible youth.', 'detail_link': 'https://oursggrants.gov.sg/grants/nycaep/instruction'}
{'icon': 'https://oursggrants.gov.sg/AgencyIcon/ncss/NCSS logo.svg', 'title': 'Charities Capability Fund (CCF)', 'description': 'Supports exempt/registered charities and Institutions of a Public Character to enhance g

# STEP 2: EXTRACT DETAILS FOR EACH GRANT

In [17]:
import os

records = pd.DataFrame(data).to_dict(orient="records")
with open("grants_list.json", "w", encoding="utf-8") as f:
    json.dump(records, f, ensure_ascii=False, indent=2)

In [11]:
def scrape_grant_details(base_rows, headless=False):
    # Reuse a single browser for efficiency
    def _driver():
        options = Options()
        if headless:
            options.add_argument("--headless=new")
        options.add_argument("--window-size=1920,1080")
        options.add_argument("--disable-gpu")
        options.add_argument("--no-sandbox")
        options.add_argument("--disable-dev-shm-usage")
        return webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)

    driver = _driver()
    wait = WebDriverWait(driver, 20)

    detailed_rows = []
    for row in base_rows:
        link = row.get("detail_link")
        # Seed defaults
        detail = {
            "guideline": "NA",
            "apply_method": "NA",
            "documents": "NA"
        }

        if not link:
            merged = {**row, **detail}
            detailed_rows.append(merged)
            continue

        # Navigate to detail page
        driver.get(link)
        try:
            wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, ".card-body")))
        except:
            merged = {**row, **detail}
            detailed_rows.append(merged)
            continue

        # Guideline: id="guideline"
        try:
            guideline_el = driver.find_element(By.ID, "guideline")
            detail["guideline"] = guideline_el.get_property("innerText").strip()
        except:
            pass

        # Apply method: "How to apply?" header followed by the next .text block
        try:
            apply_el = driver.find_element(
                By.XPATH,
                "//*[contains(@class,'Title_title__') and normalize-space()='How to apply?']"
                "/following-sibling::*[contains(@class,'text')][1]"
            )
            detail["apply_method"] = apply_el.get_property("innerText").strip()
        except:
            # Fallback: first .text inside card-body after the header region
            try:
                card_body = driver.find_element(By.CSS_SELECTOR, ".card-body")
                text_blocks = card_body.find_elements(By.CSS_SELECTOR, ".text")
                if text_blocks:
                    detail["apply_method"] = text_blocks[-1].get_property("innerText").strip()
            except:
                pass

        # Documents: id="template" → collect anchors
        try:
            tpl = driver.find_element(By.ID, "template")
            anchors = tpl.find_elements(By.CSS_SELECTOR, "a[href]")
            docs = []
            for a in anchors:
                href_raw = a.get_dom_attribute("href") or a.get_attribute("href")
                docs.append({
                    "text": a.get_property("innerText").strip(),
                    "href": urljoin(link, href_raw) if href_raw else None
                })
            detail["documents"] = docs if docs else "NA"
        except:
            pass

        merged = {**row, **detail}
        detailed_rows.append(merged)

    driver.quit()
    return detailed_rows

# Run details scrape and preview
details = scrape_grant_details(data, headless=False)
print(f"Detailed pages scraped: {len(details)}")
for row in details[:3]:
    print({
        "title": row.get("title"),
        "detail_link": row.get("detail_link"),
        "guideline_len": len(row.get("guideline") or ""),
        "apply_method_len": len(row.get("apply_method") or ""),
        "documents_count": (len(row["documents"]) if isinstance(row["documents"], list) else 0)
    })

Detailed pages scraped: 41
{'title': 'Active Citizen Grant', 'detail_link': 'https://oursggrants.gov.sg/grants/ssgacg/instruction', 'guideline_len': 1103, 'apply_method_len': 142, 'documents_count': 3}
{'title': 'Asia-Ready Exposure Programme (AEP)', 'detail_link': 'https://oursggrants.gov.sg/grants/nycaep/instruction', 'guideline_len': 2092, 'apply_method_len': 142, 'documents_count': 13}
{'title': 'Charities Capability Fund (CCF)', 'detail_link': 'https://oursggrants.gov.sg/grants/ccf/instruction', 'guideline_len': 1742, 'apply_method_len': 181, 'documents_count': 0}


In [12]:
details

[{'icon': 'https://oursggrants.gov.sg/AgencyIcon/sportsg/6268ff1eef22a80677d9.svg',
  'title': 'Active Citizen Grant',
  'description': 'The Active Citizen Grant encourages ground-up initiatives through innovating and organising activities in any of the following three domains: sports volunteerism, physical activity or health & wellness initiatives.',
  'detail_link': 'https://oursggrants.gov.sg/grants/ssgacg/instruction',
  'guideline': 'We support community sporting events/projects that fulfil any of the following objectives:\nEngage the community with meaningful and impactful volunteering opportunities.\nDemonstrate innovative ways of engaging and inspiring the community through sport, physical activity or health & wellness initiatives.\nIncorporate elements of “Care” that contribute towards building a caring society e.g. by providing opportunities for social mixing among different groups, offering volunteering platforms for the community to give back to society, rallying participat

In [16]:
records = pd.DataFrame(details).to_dict(orient="records")
with open("grants_detailed_list.json", "w", encoding="utf-8") as f:
    json.dump(records, f, ensure_ascii=False, indent=2)