In [1]:
##### Cleans US labor data
# adds hired and unpaid workers and reformats

import os
import pandas as pd

In [2]:
##### Load data

# Get the current working directory
cd = os.path.dirname(os.getcwd())

# Import data
hired_labor = pd.read_csv(f"{cd}/Data/Raw/Sub_National/USDA_ag_census/ag_workers_011526.csv")
unpaid_labor = pd.read_csv(f"{cd}/Data/Raw/Sub_National/USDA_ag_census/ag_workers_unpaid_011526.csv")

US_county_codes = pd.read_csv(f"{cd}/Data/Correspondence_tables/US_counties.csv")

# Set save path
save_path = f"{cd}/Data/Clean/Labor/US_labor.csv"

In [3]:
##### Clean

# create combined ANSI code (State ANSI 00 + County ANSI 000)
hired_labor['Full_ANSI'] = (
    hired_labor['State ANSI'].fillna(0).astype(int).astype(str).str.zfill(2) +
    hired_labor['County ANSI'].fillna(0).astype(int).astype(str).str.zfill(3)
)

unpaid_labor['Full_ANSI'] = (
    unpaid_labor['State ANSI'].fillna(0).astype(int).astype(str).str.zfill(2) +
    unpaid_labor['County ANSI'].fillna(0).astype(int).astype(str).str.zfill(3)
)

# drop unnecessary columns
columns_to_keep = ['Year', 'Full_ANSI', 'Value']
hired_labor = hired_labor[columns_to_keep]
unpaid_labor = unpaid_labor[columns_to_keep]

# merge datasets
labor = pd.merge(hired_labor, unpaid_labor, on=['Year', 'Full_ANSI'])

# rename columns
labor = labor.rename(columns={
    'Value_x': 'Hired_Workers',
    'Value_y': 'Unpaid_Family_Workers'
})

# convert columns to numeric 
cols_to_numeric = [
    'Hired_Workers',
    'Unpaid_Family_Workers'
]

for col in cols_to_numeric:

    labor[col] = (
        labor[col]
        .astype(str)
        .str.replace(',', '', regex=True) # remove commas from strings
        .replace({'\\(D\\)': '0', '\\(Z\\)': '0', 'NA': '0'}, regex=True)  # replace USDA symbols with 0's
        .astype(float)                            
    )

# calculate total capital stock
labor['Employment'] = (
    labor['Hired_Workers'] +
    labor['Unpaid_Family_Workers']
)

# convert to wide
labor_wide = labor.pivot(
    index='Full_ANSI',
    columns='Year',
    values='Employment'  
).reset_index()

# add units
labor_wide['Units'] = 'Ag labor - jobs'

# replace all missing with 0
labor_wide = labor_wide.fillna(0)

# convert ANSI to string
labor_wide['Full_ANSI'] = labor_wide['Full_ANSI'].astype(str).str.zfill(5)

In [4]:
##### Save cleaned data
labor_wide.to_csv(save_path, index=False)