In [None]:
import pandas as pd
import requests
from dotenv import load_dotenv
import sys 
from pathlib import Path
import os

# ------------------------------ #
# Must run together to access NASS API key
# ------------------------------ #
src_path = Path.cwd() / 'src'
sys.path.insert(0, str(src_path))
from utils.nass_api import NASS_API_KEY
# ------------------------------ #

In [None]:
base_url = "https://quickstats.nass.usda.gov/api/api_GET/"

params = {'key': NASS_API_KEY,
          'source_desc': 'SURVEY',
          'sector_desc': 'ANIMALS & PRODUCTS',
          'group_desc': 'LIVESTOCK',
          'commodity_desc': 'CATTLE',
          'statisticcat_desc': 'PRICE RECEIVED',
          'agg_level_desc': 'NATIONAL'
         }

response = requests.get(base_url, params=params)

cattle_price = response.json()

cattle_price = pd.DataFrame(cattle_price['data'])

### Prepare annual steer price data

In [None]:
select_vars = ['year', 'reference_period_desc', 'begin_code', 'freq_desc',  'unit_desc', 'sector_desc',  'class_desc', 'commodity_desc', 'short_desc', 'statisticcat_desc', 'Value']

cattle_price_sel = cattle_price[select_vars]

Filter cattle data to only include records with "STEERS" in the description. 

This uses str.contains() with regex=True to find any description containing "STEERS"

In [None]:
steer_prices = cattle_price_sel[cattle_price_sel["short_desc"].str.contains('STEERS', regex=True)]

print(steer_prices['short_desc'].unique())

Filter to monthly frequency only.

In [None]:
steer_prices_monthly = steer_prices.loc[steer_prices['freq_desc'] == 'MONTHLY'].copy()

In [None]:
steer_prices_monthly.year.unique()

Check that we are not missing any years. 

In [None]:
# # Get the years present in the data
actual_years = set(steer_prices_monthly['year'].astype(int).unique())


# # Create the full range of expected years
expected_years = set(range(min(actual_years), max(actual_years)+1))  # 2026 to include 2025

# Find missing years
missing_years = expected_years - actual_years


print(f"Missing years: {sorted(missing_years)}")
print(f"Total missing years: {len(missing_years)}")

# Also show the range of years we actually have
print(f"Years available: {min(actual_years)} to {max(actual_years)}")
print(f"Total years available: {len(actual_years)}")

Check that each year contains a full 12 months. 

In [None]:
count_by_month = (
        steer_prices_monthly
        .groupby('year')['reference_period_desc']
        .size()
        .reset_index(name='count')
    )

Only the most recent year, 2025, does not contain a full set of 12 months of price data. 

In [None]:
not_eq_12 = (count_by_month['count'] != 12)
idx = count_by_month[not_eq_12].index
count_by_month.loc[idx]


In [None]:
steer_prices_monthly = steer_prices_monthly[steer_prices_monthly['year'] != 2025].copy()

Convert the 'year' column to integer type for merging with CPI data. The copy() above allows us to modify without SettingWithCopyWarning.  Rename the `reference_period_desc` to `month` and the `begin_code` to `month_id`. 

In [None]:
steer_prices_monthly['year'] = steer_prices_monthly['year'].astype(int)
steer_prices_monthly.rename(columns={'reference_period_desc': 'month', 
                                     'begin_code': 'month_id'}, inplace=True)


Convert the 'Value' column from string to numeric. `errors='coerce'` will convert any non-numeric values (like "(D)" for suppressed data) to `NaN`. 

In [None]:
steer_prices_monthly['Value'].eq("'(D)'").any()

In [None]:
steer_prices_monthly['Value'] = pd.to_numeric(steer_prices_monthly['Value'], errors='coerce')

Check for missing values in the `Value` column. 

In [None]:
print(f'''There are {steer_prices_monthly['Value'].isna().sum()} missing values in the 'Value' column.''')
# Drop any rows with missing price values
# This removes any observations where the price was suppressed or missing
# steer_prices_monthly = steer_prices_monthly.dropna(subset=['Value'])

Rename `Value` column to `nominal_price` for clarity. This makes it explicit that these are nominal (not inflation-adjusted) prices. 

In [None]:
steer_prices_monthly = steer_prices_monthly.rename(columns={'Value': 'nominal_price'})

### Save to CSV

In [None]:
# Save csv
steer_prices_monthly.to_csv('data/steer_prices_monthly_nass.csv', index=False)