In [None]:
import geopandas as gpd
import folium as folium
import pygris

import pandas as pd
import numpy as np

# Turn off scientific notation for better readability
# Similar to options(scipen=999) in R
pd.options.display.float_format = '{:.2f}'.format  # Format floats with 2 decimal places
np.set_printoptions(suppress=True)  # Suppress scientific notation in numpy arrays

cattle_inv = pd.read_csv('data/cattle_inventories_2022_nass.csv')
states = pygris.states(cb=True, year=2022)

In [None]:
# Convert to string and pad with leading zeros to length 2
cattle_inv['state_fips_code'] = cattle_inv['state_fips_code'].astype(str).str.zfill(2)

In [None]:
# Merge cattle inventory data with state geometries
# Use states as the left dataframe to preserve all state geometries and GeoDataFrame properties
# 'left' merge ensures all states are included even if they don't have cattle data
cattle_inv_gpd = states.merge(
    cattle_inv, 
    left_on='STATEFP', 
    right_on='state_fips_code',
    how='left'  # Keep all states, fill missing cattle data with NaN
)

### Create Maps Using GeoPandas

We'll create two simple choropleth maps showing cattle inventories by state for each cattle category.

In [None]:
# Check the data distribution for each category separately
print("CATTLE, ON FEED - INVENTORY:")
on_feed = cattle_inv_gpd[cattle_inv_gpd['short_desc'] == 'CATTLE, ON FEED - INVENTORY']['Value']
print(on_feed.describe())
print(f"\nMax value: {on_feed.max():,.0f}")
print(f"States with data: {on_feed.notna().sum()}")

print("\n" + "="*60 + "\n")

print("CATTLE, COWS - INVENTORY:")
cows = cattle_inv_gpd[cattle_inv_gpd['short_desc'] == 'CATTLE, COWS - INVENTORY']['Value']
print(cows.describe())
print(f"\nMax value: {cows.max():,.0f}")
print(f"States with data: {cows.notna().sum()}")

In [None]:
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker

# Set up the figure with two subplots side by side
# figsize controls the overall size: (width, height) in inches
fig, axes = plt.subplots(1, 2, figsize=(18, 8))

# Get the two unique cattle categories (excluding NaN values)
categories = cattle_inv_gpd['short_desc'].dropna().unique()

# Define a clean colormap for the choropleth
# 'YlOrRd' = Yellow-Orange-Red, good for showing intensity/magnitude
cmap = 'YlOrRd'

# Get all states for background (to show states without data)
# Filter non-contiguous states/territories
remove_states = ['Alaska', 
                 'American Samoa', 
                 'Puerto Rico', 
                 'United States Virgin Islands', 
                 'Hawaii', 
                 'Guam', 
                 'Commonwealth of the Northern Mariana Islands']
all_states = cattle_inv_gpd[~cattle_inv_gpd['NAME'].isin(remove_states)].drop_duplicates(subset='STATEFP')

# Create a map for each category
for idx, category in enumerate(categories):
    # First, plot ALL states as a background layer in light gray
    # This ensures all state boundaries are visible
    all_states.plot(
        ax=axes[idx],
        color='lightgray',      # Light gray for states without data
        edgecolor='black',      # Black borders for visibility
        linewidth=0.8          # Thicker lines for better visibility
    )
    
    # Filter data for current category (only states with data for this category)
    category_data = cattle_inv_gpd[cattle_inv_gpd['short_desc'] == category]

    remove_states = ['ALASKA', 'HAWAII']
    category_data = category_data[~category_data['state_name'].isin(remove_states)]
    
    # Create a cleaner title by removing "CATTLE, " and " - INVENTORY"
    title = category.replace('CATTLE, ', '').replace(' - INVENTORY', '')

    category_data['Value'] = category_data['Value'] / 1000 # Convert to thousands for better readability
    
    # Plot the choropleth map on top of the background
    # This will only color states that have data for this category
    # Store the plot object to access the colorbar
    divider = category_data.plot(
        column='Value',
        cmap=cmap,
        legend=True,
        ax=axes[idx],
        edgecolor='black',  # State borders in black for visibility
        linewidth=0.8,      # Thicker border lines for better visibility
        legend_kwds={
            'label': 'Head of Cattle',  # Legend label
            'orientation': 'horizontal',  # Horizontal legend below map
            'shrink': 0.8,     # Make legend slightly smaller
            'pad': 0.05        # Padding between map and legend
        }
    )
    
    # Fix the colorbar formatting to show actual values (not scientific notation)
    # Get the colorbar from the current axes
    cbar = axes[idx].get_figure().get_axes()[-1]  # Get the most recently added axes (colorbar)
    
    # Format colorbar tick labels as integers with commas
    cbar.xaxis.set_major_formatter(ticker.FuncFormatter(lambda x, p: f'{int(x):,}'))
    
    # Set the title for this subplot
    axes[idx].set_title(
        f'{title}\n(2022)',
        fontsize=14,
        fontweight='bold',
        pad=15  # Space between title and map
    )
    
    # Remove axis labels (latitude/longitude) for cleaner look
    axes[idx].set_xlabel('')
    axes[idx].set_ylabel('')
    
    # Remove tick marks and labels for cleaner appearance
    axes[idx].set_xticks([])
    axes[idx].set_yticks([])
    
    # Turn off the axis frame for a cleaner look
    axes[idx].axis('off')

# Add an overall title for the entire figure
fig.suptitle(
    'US Cattle Inventories by State (2022)',
    fontsize=16,
    fontweight='bold',
    y=0.98  # Position at top of figure
)

# Add data source note at bottom
fig.text(
    0.99, 0.02,
    'Data Source: USDA National Agricultural Statistics Service (NASS)',
    ha='right',
    fontsize=9,
    style='italic',
    alpha=0.7
)

# Adjust spacing between subplots for better appearance
plt.tight_layout(rect=[0, 0.03, 1, 0.96])  # Leave space for suptitle and source note

# Display the figure
plt.show()