# Gates Foundation


In [1]:
import polars as pl
import pandas as pd
from oic_scrape.items import AwardItem
from datetime import datetime
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin
from io import StringIO
from attrs import asdict

FUNDER_ORG_NAME = "Bill & Melinda Gates Foundation"
FUNDER_ORG_ROR_ID = "https://ror.org/0456r8d26"

In [2]:
OUTPUT_LOCATION = "data/gatesfoundation.org.jsonl"

In [3]:
session = requests.Session()
static_grants_csv_url = "https://www.gatesfoundation.org/-/media/files/bmgf-grants.csv"
csv = session.get(static_grants_csv_url)
crawled_at = datetime.utcnow()
csv.raise_for_status()

In [4]:
df = pd.read_csv(StringIO(csv.text), skiprows=1).dropna(subset="GRANT ID")

In [5]:
awards = []
for ix, row in df.iterrows():
    location = ""

    if row["GRANTEE CITY"]:
        location += str(row["GRANTEE CITY"])
    if row["GRANTEE STATE"]:
        if len(location) > 0:
            location += ", "
        location += str(row["GRANTEE STATE"])
    if row["GRANTEE COUNTRY"]:
        if len(location) > 0:
            location += ", "
        location += str(row["GRANTEE COUNTRY"])

    year, month = row["DATE COMMITTED"].split("-")
    source_url = f"https://www.gatesfoundation.org/about/committed-grants/{year}/{month}/{row['GRANT ID']}"

    award = AwardItem(
        grant_id=f"gatesfoundation.org::{row['GRANT ID']}",
        funder_org_name=FUNDER_ORG_NAME,
        funder_org_ror_id=FUNDER_ORG_ROR_ID,
        recipient_org_name=str(row["GRANTEE"]),
        recipient_org_location=location,
        grant_year=int(year),
        grant_duration=f"{row['DURATION (MONTHS)']} months",
        award_amount=float(row["AMOUNT COMMITTED"]),
        award_currency="USD",
        award_amount_usd=float(row["AMOUNT COMMITTED"]),
        grant_description=row["PURPOSE"],
        program_of_funder=f"{row['DIVISION']} > {row['TOPIC']}'",
        source="gatesfoundation.org",
        source_url=source_url,
        raw_source_data=str(row),
        _crawled_at=crawled_at,
    )
    awards.append(asdict(award))

In [6]:
from oic_scrape.validation import validate_all

# After creating awards list but before writing to file:
try:
    validate_all(awards)
    print("All validations passed!")

    # Write to file
    pl.DataFrame(awards).write_ndjson(OUTPUT_LOCATION)

except Exception as e:
    print("Validation failed:")
    print(str(e))

All validations passed!


In [7]:
# export_df = pl.DataFrame(awards)
# export_df.write_ndjson(OUTPUT_LOCATION)