In [42]:
import pandas as pd
import requests
from dotenv import load_dotenv
import sys 
from pathlib import Path
import os

# ------------------------------ #
# Must run together to access NASS API key
# ------------------------------ #
src_path = Path.cwd() / 'src'
sys.path.insert(0, str(src_path))
from utils.nass_api import NASS_API_KEY
# ------------------------------ #

In [43]:
base_url = "https://quickstats.nass.usda.gov/api/api_GET/"

params = {'key': NASS_API_KEY,
          'source_desc': 'SURVEY',
          'sector_desc': 'ANIMALS & PRODUCTS',
          'group_desc': 'LIVESTOCK',
          'commodity_desc': 'CATTLE',
          'statisticcat_desc': 'PRICE RECEIVED',
          'agg_level_desc': 'NATIONAL'
         }

response = requests.get(base_url, params=params)

cattle_price = response.json()

cattle_price = pd.DataFrame(cattle_price['data'])

### Prepare annual steer price data

In [44]:
select_vars = ['year', 'reference_period_desc', 'begin_code', 'freq_desc',  'unit_desc', 'sector_desc',  'class_desc', 'commodity_desc', 'short_desc', 'statisticcat_desc', 'Value']

cattle_price_sel = cattle_price[select_vars]

Filter cattle data to only include records with "STEERS" in the description. 

This uses str.contains() with regex=True to find any description containing "STEERS"

In [45]:
steer_prices = cattle_price_sel[cattle_price_sel["short_desc"].str.contains('STEERS', regex=True)]

print(steer_prices['short_desc'].unique())

['CATTLE, STEERS & HEIFERS, GE 500 LBS - PRICE RECEIVED, MEASURED IN $ / CWT']


Filter to annual frequency only (exclude monthly, quarterly, etc.).

In [46]:
steer_prices_annual = steer_prices.loc[steer_prices['freq_desc'] == 'ANNUAL'].copy()

Check that we are not missing any years. 

In [47]:
# # Get the years present in the data
actual_years = set(steer_prices_annual['year'].astype(int).unique())


# # Create the full range of expected years
expected_years = set(range(min(actual_years), max(actual_years)+1))  # 2026 to include 2025

# Find missing years
missing_years = expected_years - actual_years


print(f"Missing years: {sorted(missing_years)}")
print(f"Total missing years: {len(missing_years)}")

# Also show the range of years we actually have
print(f"Years available: {min(actual_years)} to {max(actual_years)}")
print(f"Total years available: {len(actual_years)}")

Missing years: []
Total missing years: 0
Years available: 2003 to 2024
Total years available: 22


Convert the 'year' column to integer type for merging with CPI data. The copy() above allows us to modify without SettingWithCopyWarning. 

In [48]:
steer_prices_annual['year'] = steer_prices_annual['year'].astype(int)

Convert the 'Value' column from string to numeric. `errors='coerce'` will convert any non-numeric values (like "(D)" for suppressed data) to `NaN`. 

In [49]:
steer_prices['Value'].eq("'(D)'").any()

np.False_

In [50]:
steer_prices_annual['Value'] = pd.to_numeric(steer_prices_annual['Value'], errors='coerce')

Check for missing values in the `Value` column. 

In [51]:
print(f'''There are {steer_prices_annual['Value'].isna().sum()} missing values in the 'Value' column.''')
# Drop any rows with missing price values
# This removes any observations where the price was suppressed or missing
# steer_prices_annual = steer_prices_annual.dropna(subset=['Value'])

There are 0 missing values in the 'Value' column.


Rename `Value` column to `nominal_price` for clarity. This makes it explicit that these are nominal (not inflation-adjusted) prices. 

In [52]:
steer_prices_annual = steer_prices_annual.rename(columns={'Value': 'nominal_price'})

### Save to CSV

In [53]:
# Save csv
steer_prices_annual.to_csv('data/steer_prices_annual_nass.csv', index=False)