In [1]:
##### Cleans US capital stock data
# combines machinery, land, and buildings and reformats

import os
import pandas as pd

In [2]:
##### Load data

# Get the current working directory
cd = os.path.dirname(os.getcwd())

# Import data
machinery = pd.read_csv(f"{cd}/Data/Raw/Sub_National/USDA_ag_census/assets_machinery_011526.csv")
land_buildings = pd.read_csv(f"{cd}/Data/Raw/Sub_National/USDA_ag_census/assets_land_buildings_011526.csv")

US_county_codes = pd.read_csv(f"{cd}/Data/Correspondence_tables/US_counties.csv")

# Set save path
save_path = f"{cd}/Data/Clean/Capital_stock/US_capital_stock.csv"

In [3]:
##### Clean

# create combined ANSI code (State ANSI 00 + County ANSI 000)
machinery['Full_ANSI'] = (
    machinery['State ANSI'].fillna(0).astype(int).astype(str).str.zfill(2) +
    machinery['County ANSI'].fillna(0).astype(int).astype(str).str.zfill(3)
)

land_buildings['Full_ANSI'] = (
    land_buildings['State ANSI'].fillna(0).astype(int).astype(str).str.zfill(2) +
    land_buildings['County ANSI'].fillna(0).astype(int).astype(str).str.zfill(3)
)

# drop unnecessary columns
columns_to_keep = ['Year', 'Full_ANSI', 'Value']
machinery = machinery[columns_to_keep]
land_buildings = land_buildings[columns_to_keep]

# merge datasets
capital_stock = pd.merge(machinery, land_buildings, on=['Year', 'Full_ANSI'])

# rename columns
capital_stock = capital_stock.rename(columns={
    'Value_x': 'Machinery_Value_USD_nominal',
    'Value_y': 'Land_Buildings_Value_USD_nominal'
})

# convert columns to numeric 
cols_to_numeric = [
    'Machinery_Value_USD_nominal',
    'Land_Buildings_Value_USD_nominal'
]

for col in cols_to_numeric:

    capital_stock[col] = (
        capital_stock[col]
        .astype(str)
        .str.replace(',', '', regex=True) # remove commas from strings
        .replace({'\\(D\\)': '0', '\\(Z\\)': '0', 'NA': '0'}, regex=True)  # replace USDA symbols with 0's
        .astype(float)                            
    )

# calculate total capital stock
capital_stock['Total_Capital_Stock_Value_USD_nominal'] = (
    capital_stock['Machinery_Value_USD_nominal'] +
    capital_stock['Land_Buildings_Value_USD_nominal']
)

# convert to wide
capital_stock_wide = capital_stock.pivot(
    index='Full_ANSI',
    columns='Year',
    values='Total_Capital_Stock_Value_USD_nominal'  
).reset_index()

# add units
capital_stock_wide['Units'] = 'Ag capital stock - USD (nominal)'

# convert ANSI to string
capital_stock_wide['Full_ANSI'] = capital_stock_wide['Full_ANSI'].astype(str).str.zfill(5)

In [4]:
##### Save cleaned data
capital_stock_wide.to_csv(save_path, index=False)