Notebook to obtain state-level cattle inventory data for 2022 from NASS Quick Stats.

In [None]:
import pandas as pd
import requests
from dotenv import load_dotenv
import sys 
from pathlib import Path
import os

# ------------------------------ #
# Must run together to access NASS API key
# ------------------------------ #
src_path = Path.cwd() / 'src'
sys.path.insert(0, str(src_path))
from utils.nass_api import NASS_API_KEY
# ------------------------------ #

### Cattle, on Feed

In [None]:
base_url = "https://quickstats.nass.usda.gov/api/api_GET/"

params = {'key': NASS_API_KEY,
          'source_desc': 'CENSUS',
          'sector_desc': 'ANIMALS & PRODUCTS',
          'group_desc': 'LIVESTOCK',
          'commodity_desc': 'CATTLE',
          'statisticcat_desc': 'INVENTORY',
          'short_desc': 'CATTLE, ON FEED - INVENTORY',
          'domain_desc': 'TOTAL',
          'agg_level_desc': 'STATE', 
          'year': '2022'
         }

response = requests.get(base_url, params=params)

cattle_on_feed = response.json()

cattle_on_feed = pd.DataFrame(cattle_on_feed['data'])

### Cattle, Cows

In [None]:
base_url = "https://quickstats.nass.usda.gov/api/api_GET/"

params = {'key': NASS_API_KEY,
          'source_desc': 'CENSUS',
          'sector_desc': 'ANIMALS & PRODUCTS',
          'group_desc': 'LIVESTOCK',
          'commodity_desc': 'CATTLE',
          'statisticcat_desc': 'INVENTORY',
          'short_desc': 'CATTLE, COWS - INVENTORY',
          'domain_desc': 'TOTAL',
          'agg_level_desc': 'STATE', 
          'year': '2022'
         }

response = requests.get(base_url, params=params)

cattle_cows = response.json()

cattle_cows = pd.DataFrame(cattle_cows['data'])

### Combine the two datasets

In [None]:
cattle_by_type = pd.concat([cattle_on_feed, cattle_cows], ignore_index=True)

### Select relevant columns, insert `year` column, and clean `Value` column

In [None]:
select_vars = ['state_name', 'state_fips_code', 'unit_desc', 'sector_desc',  'class_desc', 'commodity_desc', 'short_desc', 'statisticcat_desc', 'Value']

### Prepare cow and fed-cattle data

In [None]:
cattle_by_type = cattle_by_type.loc[:, select_vars]


In [None]:
cattle_by_type.insert(0, 'year', 2022)

Convert any non-numeric values (like "(D)" for suppressed data) to 0. 

In [None]:
cattle_by_type['Value'].str.strip().eq("(D)").any()

In [None]:
# Handle multiple possible issues at once
cattle_by_type['Value'] = (cattle_by_type['Value']
                           .str.strip()
                           .str.replace(",", "", regex=False)
                           .replace("(D)", "0")
                           .replace("", "0")  # handle empty strings if any
                           .astype(float))

Check for missing values in the `Value` column. 

In [None]:
print(f'''There are {cattle_by_type['Value'].isna().sum()} missing values in the 'Value' column.''')
# Drop any rows with missing values
# This removes any observations where the value (# of head) was suppressed or missing
# cattle_by_type = cattle_by_type.dropna(subset=['Value'])

### Save to CSV

In [None]:
# Save csv
cattle_by_type.to_csv('data/cattle_inventories_2022_nass.csv', index=False)