### Description

This notebook downloads agricultural yield data from the USDA National Agricultural Statistics Service (NASS) QuickStats API.


In [66]:
# Imports and Logging Setup
import requests
import pandas as pd
from pathlib import Path
from tqdm import tqdm
from datetime import datetime


In [67]:
# Config

API_CONFIG = {
    "key": "CDA43551-0A5A-31F4-A033-7DD4ED6A9A17",
    "base_url": "https://quickstats.nass.usda.gov/api/api_GET/",
    "format": "JSON"
}

INTERIM_DIR = Path("C:/Users/Arnold/OneDrive/Desktop/CAPSTONE PROJECT/farming_risk_regions/data/interim/yield_data")
INTERIM_DIR.mkdir(parents=True, exist_ok=True)

CROP_CONFIG = {
    "name": "CORN",
    "start_year": 2015,
    "end_year": 2025
}

STATE_COUNTY_CONFIG = {
    'IA': ['Story'],
    'IL': ['McLean']
}

if not API_CONFIG["key"]:
    raise ValueError("API key not found. Please provide a valid USDA NASS API key.")

In [68]:
# Helper Functions
def query_nass(params):
    params_full = params.copy()
    params_full['key'] = API_CONFIG['key']
    params_full['format'] = API_CONFIG['format']
    try:
        resp = requests.get(API_CONFIG['base_url'], params=params_full)
        resp.raise_for_status()
        return resp.json().get("data", [])
    except requests.exceptions.RequestException as e:
        print(f"API request failed: {str(e)}")
        return []

def safe_float(s):
    try:
        return float(s.replace(",", "")) if s not in (None, "", " ") else None
    except (ValueError, AttributeError):
        return None

def fetch_crop_data(crop, year, state_alpha, county_name):
    result = {
        "crop": crop,
        "year": year,
        "state": state_alpha,
        "county": county_name,
        "area_harvested": None,
        "area_harvested_unit": None,
        "yield_value": None,
        "yield_unit": None
    }
    base_params = {
        "commodity_desc": crop,
        "year": year,
        "agg_level_desc": "COUNTY",
        "state_alpha": state_alpha,
        "county_name": county_name,
        "domain_desc": "TOTAL"
    }
    # Area harvested
    area_params = base_params.copy()
    area_params["statisticcat_desc"] = "AREA HARVESTED"
    area_data = query_nass(area_params)
    if area_data:
        result["area_harvested"] = safe_float(area_data[0].get("Value"))
        result["area_harvested_unit"] = area_data[0].get("unit_desc")
    # Yield
    yield_params = base_params.copy()
    yield_params["statisticcat_desc"] = "YIELD"
    yield_data = query_nass(yield_params)
    if yield_data:
        result["yield_value"] = safe_float(yield_data[0].get("Value"))
        result["yield_unit"] = yield_data[0].get("unit_desc")
    return result

In [69]:
# Data Collection and Saving
years = list(range(CROP_CONFIG['start_year'], CROP_CONFIG['end_year'] + 1))
all_records = []

print(f"Starting data collection for {CROP_CONFIG['name']}")
print(f"Time period: {CROP_CONFIG['start_year']} - {CROP_CONFIG['end_year']}")
print(f"States: {', '.join(STATE_COUNTY_CONFIG.keys())}")

for state, counties in STATE_COUNTY_CONFIG.items():
    for county in counties:
        print(f"Processing {county}, {state}")
        for year in tqdm(years, desc=f"{county}, {state}"):
            record = fetch_crop_data(
                crop=CROP_CONFIG['name'],
                year=year,
                state_alpha=state,
                county_name=county
            )
            all_records.append(record)


Starting data collection for CORN
Time period: 2015 - 2025
States: IA, IL
Processing Story, IA


Story, IA:  91%|█████████ | 10/11 [00:16<00:01,  1.52s/it]

API request failed: 400 Client Error: Bad Request for url: https://quickstats.nass.usda.gov/api/api_GET/?commodity_desc=CORN&year=2025&agg_level_desc=COUNTY&state_alpha=IA&county_name=Story&domain_desc=TOTAL&statisticcat_desc=AREA+HARVESTED&key=CDA43551-0A5A-31F4-A033-7DD4ED6A9A17&format=JSON


Story, IA: 100%|██████████| 11/11 [00:17<00:00,  1.61s/it]
Story, IA: 100%|██████████| 11/11 [00:17<00:00,  1.61s/it]


API request failed: 400 Client Error: Bad Request for url: https://quickstats.nass.usda.gov/api/api_GET/?commodity_desc=CORN&year=2025&agg_level_desc=COUNTY&state_alpha=IA&county_name=Story&domain_desc=TOTAL&statisticcat_desc=YIELD&key=CDA43551-0A5A-31F4-A033-7DD4ED6A9A17&format=JSON
Processing McLean, IL


McLean, IL:  91%|█████████ | 10/11 [00:15<00:01,  1.47s/it]

API request failed: 400 Client Error: Bad Request for url: https://quickstats.nass.usda.gov/api/api_GET/?commodity_desc=CORN&year=2025&agg_level_desc=COUNTY&state_alpha=IL&county_name=McLean&domain_desc=TOTAL&statisticcat_desc=AREA+HARVESTED&key=CDA43551-0A5A-31F4-A033-7DD4ED6A9A17&format=JSON


McLean, IL: 100%|██████████| 11/11 [00:16<00:00,  1.50s/it]

API request failed: 400 Client Error: Bad Request for url: https://quickstats.nass.usda.gov/api/api_GET/?commodity_desc=CORN&year=2025&agg_level_desc=COUNTY&state_alpha=IL&county_name=McLean&domain_desc=TOTAL&statisticcat_desc=YIELD&key=CDA43551-0A5A-31F4-A033-7DD4ED6A9A17&format=JSON





In [70]:

# Save results
df = pd.DataFrame(all_records)
output_file = INTERIM_DIR / f"{CROP_CONFIG['name'].lower()}_yields_{datetime.now():%Y%m%d}.csv"
df.to_csv(output_file, index=False)
print(f"Data saved to {output_file}")

Data saved to C:\Users\Arnold\OneDrive\Desktop\CAPSTONE PROJECT\farming_risk_regions\data\interim\yield_data\corn_yields_20250930.csv


In [71]:
# Data Summary
print("\nData Collection Summary")
print("-" * 50)
print(f"Total records: {len(df)}")
print(f"Year range: {df['year'].min()} - {df['year'].max()}")
print(f"States covered: {', '.join(df['state'].unique())}")
print("\nSample of collected data:")
print(df.head())


Data Collection Summary
--------------------------------------------------
Total records: 22
Year range: 2015 - 2025
States covered: IA, IL

Sample of collected data:
   crop  year state county  area_harvested area_harvested_unit  yield_value  \
0  CORN  2015    IA  Story        161100.0               ACRES        188.0   
1  CORN  2016    IA  Story        171800.0               ACRES        211.9   
2  CORN  2017    IA  Story        158256.0               ACRES        200.3   
3  CORN  2018    IA  Story        157800.0               ACRES        192.3   
4  CORN  2019    IA  Story        166700.0               ACRES        189.3   

  yield_unit  
0  BU / ACRE  
1  BU / ACRE  
2  BU / ACRE  
3  BU / ACRE  
4  BU / ACRE  
