# Arcadia Fund: 360Giving Bulk Data

This represents the crawler for The Arcadia Fund's grants. 360giving has a [standard tabular format](https://standard.threesixtygiving.org/en/latest/#) for publishing UK philanthropies data. It is published as csv, xls, xlsx, depending on the funder.

### Notes:
- [Bulk Grant Reporting for Arcadia Fund](https://grantnav.threesixtygiving.org/org/360G-ArcadiaFund) includes funded grants both from Arcadia Fund and its sibling funder, [Lund Trust](https://lundtrust.org.uk/), a separate organization that funds environmental initiatives. For the sake of IOI's reporting (and until they get a valid ROR ID), we will only include grants from Arcadia Fund and will actively discard grants from the Lund Trust in this dataset.
- Arcadia Fund's [Grants Directory](https://www.arcadiafund.org.uk/grant-directory) has both web listings and 360giving bulk data. This tool will check for the latest version of the 360giving bulk data and use that as the source of truth.
- We **DO NOT** use the 360giving portal data for Arcadia Fund; its indexer has not updated since 2022.



In [10]:
from oic_scrape.items import AwardItem
import pandas as pd
from datetime import datetime
import polars as pl
from bs4 import BeautifulSoup
from attrs import asdict
import requests
from io import StringIO

## Parameters and Configuration

In [11]:
THREESIXTY_G_DATA_URL = (
    "https://www.arcadiafund.org.uk/uploads/360G-March-2024-DATA.csv"
)
USE_LATEST_BULK_DATA = True
FUNDER_ORG_NAME = "Arcadia Fund"
FUNDER_ORG_ROR_ID = "https://ror.org/051z6e826"
OUTPUT_LOCATION = "data/arcadia-fund--giving360_grants.jsonl"
OUTPUT_FORMAT = "jsonl"

In [12]:
def validate_output_format(format):
    """
    Validates the output file format.

    Args:
        format (str): The output format to be validated.

    Returns:
        bool: True if the format is valid (json, jsonl, or jsonlines), False otherwise.
    """
    if (
        format.lower() == "json"
        or format.lower() == "jsonl"
        or format.lower() == "jsonlines"
    ):
        return True
    else:
        return False


if validate_output_format(OUTPUT_FORMAT):
    if OUTPUT_FORMAT.lower() == "jsonl" or OUTPUT_FORMAT.lower() == "jsonlines":
        output_format_lines = True
    else:
        output_format_lines = False
else:
    raise ValueError("Output format should be either 'json' or 'jsonl'/'jsonlines'.")

In [13]:
import requests


def find_latest_results() -> str:
    """Finds the latest results link from the Arcadia Fund

    Returns:
        str: The URL of the latest results file
    """

    grants_directory_url = "https://www.arcadiafund.org.uk/grant-directory"

    response = requests.get(grants_directory_url)
    response.raise_for_status()  # Assuming 'response' is the HTML page response object
    soup = BeautifulSoup(response.text, "html.parser")

    # Find the CSV link in the HTML page
    csv_link = None
    for link in soup.find_all("a"):
        href = link.get("href")
        if href:
            # Strip URL parameters to check true file extension
            base_href = href.split("?")[0]
            if base_href.endswith(".csv"):
                csv_link = href
                break

    if csv_link:
        return csv_link
    else:
        raise ValueError(
            "No CSV link found in the Arcadia Fund's grant directory page."
        )


url = find_latest_results()
df = pd.read_csv(url)
crawl_ts = datetime.now()
df = df.dropna()


In [14]:
awards = []
for ix, row in df.iterrows():
    source = "arcadia.org.uk__360giving-export"
    grant_id = f"360g::{row['Identifier']}"
    funder_org_name = FUNDER_ORG_NAME
    recipient_org_name = row["Grant recipient"]
    funder_org_ror_id = FUNDER_ORG_ROR_ID
    grant_year = int(row["Award year"])
    grant_duration = f"{row['Duration']} Years"
    award_amount = row["Amount Awarded"] if row["Amount Awarded"] else None
    award_currency = "USD"
    award_amount_usd = award_amount
    grant_title = row["Title"]
    grant_description = row["Description"]
    # Convert program_of_funder to string first
    program_of_funder = (
        str(row["Funding area"]) if pd.notna(row["Funding area"]) else ""
    )
    if pd.notna(row["Priority"]) and str(row["Priority"]) != str(row["Funding area"]):
        program_of_funder += f" > {row['Priority']}"
    raw_source_data = row.to_json()

    # In the award creation
    award = AwardItem(
        _crawled_at=crawl_ts,
        source=source,
        grant_id=grant_id,
        funder_org_name=funder_org_name,
        funder_org_ror_id=FUNDER_ORG_ROR_ID,
        recipient_org_name=recipient_org_name,
        grant_year=grant_year,
        grant_duration=grant_duration,
        award_amount=award_amount if award_amount else None,
        award_currency=award_currency if award_currency else None,
        award_amount_usd=award_amount_usd if award_amount_usd else None,
        source_url=url,  # Add the source URL
        grant_title=grant_title,
        grant_description=str(grant_description),
        program_of_funder=program_of_funder,
        raw_source_data=raw_source_data,
        _award_schema_version="0.1.1",
    )
    awards.append(asdict(award))


In [15]:
# After creating the awards list but before writing to file:
from attrs import validate
from typing import List


def validate_awards(awards_list: List[dict]) -> bool:
    """
    Validates a list of award dictionaries against the AwardItem schema.

    Args:
        awards_list: List of award dictionaries to validate

    Returns:
        bool: True if all awards are valid

    Raises:
        Exception: If any award fails validation
    """
    for i, award_dict in enumerate(awards_list):
        try:
            # Convert dict back to AwardItem to trigger validation
            AwardItem(**award_dict)
        except Exception as e:
            raise Exception(f"Validation failed for award {i}: {str(e)}")
    return True


# Validate awards before writing
validate_awards(awards)

# If validation passes, proceed with writing to file
pl.DataFrame(awards).write_ndjson(OUTPUT_LOCATION)

In [16]:
pl.DataFrame(awards).write_ndjson(OUTPUT_LOCATION)

In [17]:
awards

[{'_crawled_at': datetime.datetime(2024, 11, 13, 20, 27, 30, 540890),
  'source': 'arcadia.org.uk__360giving-export',
  'grant_id': '360g::360G-ArcadiaFund-1152',
  'funder_org_name': 'Arcadia Fund',
  'recipient_org_name': 'Stockholm School of Economics',
  'funder_org_ror_id': 'https://ror.org/051z6e826',
  'recipient_org_ror_id': None,
  'recipient_org_location': None,
  'pi_name': None,
  'named_participants': None,
  'grant_year': 2004,
  'grant_duration': '2.0 Years',
  'grant_start_date': None,
  'grant_end_date': None,
  'award_amount': 1238300.0,
  'award_currency': 'USD',
  'award_amount_usd': 1238300.0,
  'source_url': 'https://arcadia-fund.files.svdcdn.com/production/WebData-08-2024.csv?dm=1724398894',
  'grant_title': 'Centre for Economics and Financial Research',
  'grant_description': 'To develop a new Russian economics think tank.',
  'program_of_funder': 'Discretionary',
  'comments': None,
  'raw_source_data': '{"Identifier":"360G-ArcadiaFund-1152","Title":"Centre for