Notebook to obtain state-level almonds area harevested data for 2022 from NASS Quick Stats.

In [None]:
import pandas as pd
import requests
from dotenv import load_dotenv
import sys 
from pathlib import Path
import os

# ------------------------------ #
# Must run together to access NASS API key
# ------------------------------ #
src_path = Path.cwd() / 'src'
sys.path.insert(0, str(src_path))
from utils.nass_api import NASS_API_KEY
# ------------------------------ #

### Almonds

In [None]:
base_url = "https://quickstats.nass.usda.gov/api/api_GET/"

params = {'key': NASS_API_KEY,
          'source_desc': 'CENSUS',
          'sector_desc': 'CROPS',
          'group_desc': 'FRUIT & TREE NUTS',
          'commodity_desc': 'ALMONDS',
          'statisticcat_desc': 'AREA BEARING',
          'short_desc': 'ALMONDS - ACRES BEARING',
          'domain_desc': 'TOTAL',
          'agg_level_desc': 'STATE', 
          'year': '2022'
         }

response = requests.get(base_url, params=params)

almonds = response.json()

almonds = pd.DataFrame(almonds['data'])

### Select relevant columns and clean `Value` column

In [None]:
select_vars = ['year', 'state_name', 'state_fips_code', 'unit_desc', 'sector_desc',  'class_desc', 'commodity_desc', 'short_desc', 'statisticcat_desc', 'Value']

### Prepare almond data

In [None]:
almonds = almonds.loc[:, select_vars]


Convert any non-numeric values (like "(D)" for suppressed data) to 0. 

In [None]:
almonds['Value'].str.strip().eq("(D)").any()

In [None]:
# Handle multiple possible issues at once
almonds['Value'] = (almonds['Value']
                           .str.strip()
                           .str.replace(",", "", regex=False)
                           .replace("(D)", "0")
                           .replace("(Z)", "0")
                           .replace("", "0")  # handle empty strings if any
                           .astype(float))

Check for missing values in the `Value` column. 

In [None]:
print(f'''There are {almonds['Value'].isna().sum()} missing values in the 'Value' column.''')
# Drop any rows with missing values
# This removes any observations where the value was suppressed or missing
# almonds = almonds.dropna(subset=['Value'])

### Save to CSV

In [None]:
# Save csv
almonds.to_csv('data/almonds_acres_bearing_nass_2022.csv', index=False)