In [4]:
import requests, pandas as pd
from urllib.parse import quote

therapies = ["acupuncture", "yoga", "tai chi", "meditation", "herbal"]
conditions = ["fibromyalgia", "chronic pain"]

import requests

def ctg_count(cond, therapy):
    """
    Robust count for ClinicalTrials.gov v2.
    Strategy:
      1) Try to read a total count if the API returns one.
      2) If not present, paginate using nextPageToken and count.
    """
    base = "https://clinicaltrials.gov/api/v2/studies"
    expr = f"{cond} AND {therapy}"

    # Try a quick call first (small page) to see if total appears
    params = {
        "query.term": expr,
        "pageSize": 1,
        "format": "json",
    }
    r = requests.get(base, params=params, timeout=30, headers={"Accept":"application/json"})
    r.raise_for_status()
    js = r.json()

    # 1) Try common total fields
    for key in ("totalCount", "total", "count", "countStudies"):
        if isinstance(js.get(key), int):
            return int(js[key])
    if isinstance(js.get("metadata"), dict):
        for key in ("totalCount", "total", "count"):
            if isinstance(js["metadata"].get(key), int):
                return int(js["metadata"][key])

    # 2) No total? Paginate and count all items (safe & accurate)
    total = 0
    page_token = js.get("nextPageToken")
    # First page count
    total += len(js.get("studies", []))

    # Loop through remaining pages
    while page_token:
        params = {
            "query.term": expr,
            "pageSize": 200,     # bump page size within API limits
            "pageToken": page_token,
            "format": "json",
        }
        r = requests.get(base, params=params, timeout=60, headers={"Accept":"application/json"})
        r.raise_for_status()
        js = r.json()
        total += len(js.get("studies", []))
        page_token = js.get("nextPageToken")

    return total


def pubmed_count(cond, therapy):
    base = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
    term = f'{cond} AND {therapy}'
    params = {"db":"pubmed","retmode":"json","term":term}
    r = requests.get(base, params=params, timeout=30)
    r.raise_for_status()
    return int(r.json()["esearchresult"]["count"])

rows=[]
for cond in conditions:
    for th in therapies:
        rows.append({
            "condition": cond,
            "therapy": th,
            "clinicaltrials_n": ctg_count(cond, th),
            "pubmed_n": pubmed_count(cond, th)
        })

df = pd.DataFrame(rows).sort_values(["condition","therapy"])
df

# This gives the Streamlit app a simple source to load.
df.to_csv("../data/evidence_counts.csv", index=False)
print("Saved to data/evidence_counts.csv")


Saved to data/evidence_counts.csv
