In [1]:
%pip install selenium pandas

Defaulting to user installation because normal site-packages is not writeable
Collecting selenium
  Downloading selenium-4.35.0-py3-none-any.whl (9.6 MB)
[K     |████████████████████████████████| 9.6 MB 5.2 MB/s eta 0:00:01
Collecting trio~=0.30.0
  Downloading trio-0.30.0-py3-none-any.whl (499 kB)
[K     |████████████████████████████████| 499 kB 19.8 MB/s eta 0:00:01
Collecting websocket-client~=1.8.0
  Downloading websocket_client-1.8.0-py3-none-any.whl (58 kB)
[K     |████████████████████████████████| 58 kB 11.3 MB/s eta 0:00:01
[?25hCollecting trio-websocket~=0.12.2
  Downloading trio_websocket-0.12.2-py3-none-any.whl (21 kB)
Collecting sortedcontainers
  Downloading sortedcontainers-2.4.0-py2.py3-none-any.whl (29 kB)
Collecting sniffio>=1.3.0
  Downloading sniffio-1.3.1-py3-none-any.whl (10 kB)
Collecting attrs>=23.2.0
  Downloading attrs-25.3.0-py3-none-any.whl (63 kB)
[K     |████████████████████████████████| 63 kB 11.9 MB/s eta 0:00:01
Collecting outcome
  Downloading outc

In [52]:
import json
import time
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

options = Options()
options.add_argument("--headless=new")
driver = webdriver.Chrome(options=options)

search_url = "https://www.gofundme.com/s?q=cancer"
driver.get(search_url)

wait = WebDriverWait(driver, 20)

# Keep clicking "Show More" until we have 1200 links or no more button
links = set()
while len(links) < 1200:
    campaigns = driver.find_elements(By.CSS_SELECTOR, "a[href*='/f/']")
    for c in campaigns:
        href = c.get_attribute("href")
        if href and "/f/" in href:
            links.add(href)
        if len(links) >= 1200:
            break

    print(f"Currently collected: {len(links)} links")

    try:
        show_more = wait.until(
            EC.element_to_be_clickable((By.CSS_SELECTOR, "button[data-element-id='btn_show_more']"))
        )
        driver.execute_script("arguments[0].click();", show_more)
        time.sleep(2)
    except:
        print("⚠️ No more 'Show more' button found.")
        break

links = list(links)
print(f"\nFound {len(links)} campaign links in total")

data = []

# ========== scrape each campaign ==========
for i, url in enumerate(links, start=1):
    driver.get(url)
    time.sleep(2)

    title, description, amount, goal, created, cover_image = None, None, None, None, None, None

    try:
        # JSON first
        script = driver.find_element(By.ID, "__NEXT_DATA__").get_attribute("innerHTML")
        js = json.loads(script)
        page_props = js["props"]["pageProps"]
        fundraiser = page_props.get("fundraiser") or page_props.get("fundraiserDto")

        if fundraiser:
            title = fundraiser.get("fundName")
            description = fundraiser.get("description")
            amount = fundraiser.get("currentAmount")
            goal = fundraiser.get("goalAmount")
            created = fundraiser.get("createdAt")
            if fundraiser.get("media"):
                cover_image = fundraiser["media"][0]["url"]

    except Exception as e:
        print(f"JSON failed at {url}: {e}")

    try:
        if not title:
            title = driver.find_element(By.TAG_NAME, "h1").text

        if not description:
            try:
                description = driver.find_element(
                    By.CSS_SELECTOR, "div[class*='campaign-description_content']"
                ).text
            except:
                description = None

        if not cover_image:
            try:
                cover_image = driver.find_element(
                    By.CSS_SELECTOR, "img[class*='hero-media-viewer_coverImage']"
                ).get_attribute("src")
            except:
                cover_image = None

        if not created:
            try:
                created = driver.find_element(
                    By.CSS_SELECTOR, "span.m-campaign-byline-created"
                ).text
            except:
                created = None

        if not amount or not goal:
            try:
                amount_block = driver.find_element(
                    By.XPATH, "//div[contains(@class,'donation-overview')]//h2"
                ).text
                parts = amount_block.split("raised of")
                amount = parts[0].strip()
                goal = parts[1].strip() if len(parts) > 1 else None
            except:
                amount, goal = None, None

    except Exception as e:
        print(f"HTML fallback failed at {url}: {e}")

    campaign_data = {
        "title": title,
        "description": description,
        "amount": amount,
        "goal": goal,
        "created": created,
        "cover_image": cover_image,
        "url": url
    }
    data.append(campaign_data)
    print(f"[{i}/{len(links)}] Scraped: {title}")

print(f"\nFinished scraping {len(data)} campaigns.")


Currently collected: 48 links
Currently collected: 96 links
Currently collected: 144 links
Currently collected: 192 links
Currently collected: 240 links
Currently collected: 288 links
Currently collected: 336 links
Currently collected: 384 links
Currently collected: 432 links
Currently collected: 480 links
Currently collected: 528 links
Currently collected: 576 links
Currently collected: 624 links
Currently collected: 672 links
Currently collected: 720 links
Currently collected: 768 links
Currently collected: 816 links
Currently collected: 864 links
Currently collected: 912 links
Currently collected: 960 links
Currently collected: 1000 links
⚠️ No more 'Show more' button found.

Found 1000 campaign links in total
[1/1000] Scraped: Help Baby Jenson Fight a Rare Brain Cancer
[2/1000] Scraped: Dennis’s Fight Against Stage 4 Esophageal Cancer
[3/1000] Scraped: Support Daniel DeMeza's Fight Against Cancer
[4/1000] Scraped: Stand with Alexis Gleason in Her Fight Against Brain Cancer
[5/1000]

In [55]:
import pandas as pd

df = pd.DataFrame(data)
df.to_csv("campaigns.csv", index=False, quoting=1)