In [1]:
##### Cleans ILO agricultural labor data
# aligns with core ISO codes and reformats data

import os
import pandas as pd

In [2]:
##### Load data

# Get the current working directory
cd = os.path.dirname(os.getcwd())

# Import data
ILO_estimate = pd.read_csv(f"{cd}/Data/Raw/FAO_ILO_labor/FAO_ILO_ag_estimates_01142026.csv")
FAO_crop_animal = pd.read_csv(f"{cd}/Data/Raw/FAO_ILO_labor/FAO_ILO_crop_animal_labor_01142026.csv")

country_codes = pd.read_csv(
    f"{cd}/Data/Correspondence_tables/country_names.csv",
    encoding="cp1252"
)

# Set save path
save_path_1 = f"{cd}/Data/Clean/Labor/ILO_ag_labor_estimate.csv"
save_path_2 = f"{cd}/Data/Clean/Labor/ILO_crop_animal_labor.csv"

In [3]:
##### Clean data

# drop unnecessary columns
columns_to_keep = ['Area Code (M49)', 'Year', 'Value']
ILO_estimate = ILO_estimate[columns_to_keep]
FAO_crop_animal = FAO_crop_animal[columns_to_keep]

# rename columns
ILO_estimate = ILO_estimate.rename(columns={
    'Area Code (M49)': 'FAO_code',
    'Year': 'Year',
    'Value': 'Values'
})
FAO_crop_animal = FAO_crop_animal.rename(columns={
    'Area Code (M49)': 'FAO_code',
    'Year': 'Year',
    'Value': 'Values'
})

# reformat to wide format
ILO_estimate_wide = ILO_estimate.pivot(
    index='FAO_code',
    columns='Year',
    values='Values'
).reset_index()

FAO_crop_animal_wide = FAO_crop_animal.pivot(
    index='FAO_code',
    columns='Year',
    values='Values'
).reset_index()

# add units  
ILO_estimate_wide['Units'] = 'Ag labor - thousand jobs'
FAO_crop_animal_wide['Units'] = 'Ag labor - thousand jobs'

# merge with country codes to get ISO3 codes
ILO_estimate_wide = ILO_estimate_wide.merge(
    country_codes[['FAO_code', 'ISO3', 'Country_Name']],
    on='FAO_code',
    how='left'
)
FAO_crop_animal_wide = FAO_crop_animal_wide.merge(
    country_codes[['FAO_code', 'ISO3', 'Country_Name']],
    on='FAO_code',
    how='left'
)

# reorder columns
cols = ['ISO3', 'Country_Name', 'Units'] + [col for col in ILO_estimate_wide.columns if col not in ['ISO3', 'Country_Name', 'Units']]
ILO_estimate_wide = ILO_estimate_wide[cols]
ILO_estimate_wide = ILO_estimate_wide.drop(columns=['FAO_code'])

cols = ['ISO3', 'Country_Name', 'Units'] + [col for col in FAO_crop_animal_wide.columns if col not in ['ISO3', 'Country_Name', 'Units']]
FAO_crop_animal_wide = FAO_crop_animal_wide[cols]
FAO_crop_animal_wide = FAO_crop_animal_wide.drop(columns=['FAO_code'])

# drop if ISO3 is missing
ILO_estimate_wide = ILO_estimate_wide.dropna(subset=['ISO3'])
FAO_crop_animal_wide = FAO_crop_animal_wide.dropna(subset=['ISO3'])

In [4]:
##### Save cleaned data
ILO_estimate_wide.to_csv(save_path_1, index=False)
FAO_crop_animal_wide.to_csv(save_path_2, index=False)