In [1]:
##### Cleans US labor data
# combines crop and animal labor and reformats

import os
import pandas as pd

In [2]:
##### Load data

# Get the current working directory
cd = os.path.dirname(os.getcwd())

# Import data
crop = pd.read_csv(f"{cd}/Data/Raw/US_BLS_ag_labor/crop_labor_2020_01152026.csv")
animal = pd.read_csv(f"{cd}/Data/Raw/US_BLS_ag_labor/animal_labor_2020_01152026.csv")

US_county_codes = pd.read_csv(f"{cd}/Data/Correspondence_tables/US_counties.csv")

# Set save path
save_path = f"{cd}/Data/Clean/Labor/US_county_labor_BLS.csv"

In [3]:
##### Clean

# create combined ANSI code (State ANSI 00 + County ANSI 000)
crop['Full_ANSI'] = crop['area_fips'].fillna(0).astype(int).astype(str).str.zfill(5) 
animal['Full_ANSI'] = animal['area_fips'].fillna(0).astype(int).astype(str).str.zfill(5)

# drop unnecessary columns
columns_to_keep = ['year', 'Full_ANSI', 'own_code', 'annual_avg_emplvl']
crop = crop[columns_to_keep]
animal = animal[columns_to_keep]

# merge datasets
labor = pd.merge(crop, animal, on=['year', 'Full_ANSI', 'own_code'])

# rename columns
labor = labor.rename(columns={
    'year': 'Year',
    'annual_avg_emplvl_x': 'Crop_Jobs',
    'annual_avg_emplvl_y': 'Animal_Jobs'
})

# where there are duplicate entries of Year and Full_ANSI take the max (Due to different reporting entities)
labor = labor.groupby(['Year', 'Full_ANSI'], as_index=False).max()

# calculate total employment
labor['Total_Jobs'] = (
    labor['Crop_Jobs'] +
    labor['Animal_Jobs']
)

# convert to wide
labor_wide = labor.pivot(
    index='Full_ANSI',
    columns='Year',
    values='Total_Jobs'  
).reset_index()

# add units
labor_wide['Units'] = 'Ag employment - jobs'

# drop if 3 right digits of Full_ANSI are 000 (state level data)
labor_wide = labor_wide[~labor_wide['Full_ANSI'].str.endswith('000')]

In [4]:
##### Save cleaned data
labor_wide.to_csv(save_path, index=False)