In [1]:
##### Cleans EU capital stock and labor data
# convert to totals and reformat

import os
import pandas as pd

In [2]:
##### Load data

# Get the current working directory
cd = os.path.dirname(os.getcwd())

# Import data
EU_data = pd.read_csv(f"{cd}/Data/Raw/Sub_National/EU_FADN/capital_labor_01152026.csv")

EU_geo = pd.read_csv(f'{cd}/Data/Correspondence_tables/EU_FADN.csv')

USD_EUR = pd.read_csv(f'{cd}/Data/Exchange_rates/USD_EUR.csv')

# Set save path
save_path_capital = f"{cd}/Data/Clean/Capital_stock/EU_capital_stock.csv"
save_path_labor = f"{cd}/Data/Clean/Labor/EU_labor.csv"

In [3]:
##### Clean data

# replace '-' with 0
EU_data['(SE441) Total fixed assets (€/farm)'] = EU_data['(SE441) Total fixed assets (€/farm)'].replace('-', 0)
EU_data['(SE010) Total labour input (AWU/farm)'] = EU_data['(SE010) Total labour input (AWU/farm)'].replace('-', 0)

# calculate totals
EU_data['Total_fixed_assets_EUR'] = EU_data['(SE441) Total fixed assets (€/farm)'].astype('float64') * EU_data['(SYS02) Farms represented (nb)'] 
EU_data['Total_labor_input_AWU'] = EU_data['(SE010) Total labour input (AWU/farm)'].astype('float64') * EU_data['(SYS02) Farms represented (nb)'] 

# split into labor and capial 
EU_capital = EU_data[['Year', 'Region', 'Total_fixed_assets_EUR']]
EU_labor = EU_data[['Year', 'Region', 'Total_labor_input_AWU']]

# convert capital to USD
EU_capital = EU_capital.merge(USD_EUR, on='Year', how='left')

EU_capital['Total_fixed_assets_USD'] = EU_capital['Total_fixed_assets_EUR'] / EU_capital['EUR_per_USD_nominal']

# convert to wide 
EU_capital_wide = EU_capital.pivot(
    index='Region',
    columns='Year',
    values='Total_fixed_assets_USD'  
).reset_index()

EU_labor_wide = EU_labor.pivot(
    index='Region',
    columns='Year',
    values='Total_labor_input_AWU'  
).reset_index()

# merge with rest of region data
EU_capital_wide = EU_capital_wide.merge(EU_geo, left_on='Region', right_on='FADN_ID_2020', how='right')
EU_labor_wide = EU_labor_wide.merge(EU_geo, left_on='Region', right_on='FADN_ID_2020', how='right')

# add units
EU_capital_wide['Units'] = 'Ag capital stock - USD (nominal)'
EU_labor_wide['Units'] = 'Ag labor - jobs'

# re-order columns and drop un-needed ones 
columns_to_keep = ['FADN_code_2020', 'Units',  2004,             2005,             2006,
                   2007,             2008,             2009,             2010,
                   2011,             2012,             2013,             2014,
                   2015,             2016,             2017,             2018,
                   2019,             2020,             2021,             2022,
                   2023]
EU_capital_wide = EU_capital_wide[columns_to_keep]
EU_labor_wide = EU_labor_wide[columns_to_keep]

In [4]:
# Save cleaned data
EU_capital_wide.to_csv(save_path_capital, index=False) 
EU_labor_wide.to_csv(save_path_labor, index=False) 