In [16]:
##### Cleans China capital stock data
# does basic calculation and reformats

import os
import pandas as pd

In [17]:
##### Load data

# Get the current working directory
cd = os.path.dirname(os.getcwd())

# Import data
households = pd.read_csv(f"{cd}/Data/Raw/CHN_stats/Rural_households_10000.csv")
asset_value = pd.read_csv(f"{cd}/Data/Raw/CHN_stats/Value_fixed_assets_agriculture_yuan_per_household.csv")
ag_power = pd.read_csv(f"{cd}/Data/Raw/CHN_stats/Total_Power_Agricultural_Machinery_10000kw.csv")

CHN_codes = pd.read_csv(f"{cd}/Data/Correspondence_tables/CHN_provinces.csv")

USD_YUAN = pd.read_csv(f"{cd}/Data/Exchange_rates/USD_YUAN.csv")

# Set save path
save_path = f"{cd}/Data/Clean/Capital_stock/CHN_capital_stock_NBS.csv"

In [18]:
##### Do calculation
# value data only runs until 2012 and is per household
# first multiply by households to get total value in each province
# then assume value follows same growth rate as machinery power from 2012-2024

# convert to long
households_long = households.melt(
    id_vars=["Region"],      
    var_name="Year",           
    value_name="rural_households_10000"    
)

asset_value_long = asset_value.melt(
    id_vars=["Region"],      
    var_name="Year",           
    value_name="ag_asset_value_yuan_per_household"    
)

# merge 
capital_stock = households_long.merge(asset_value_long, on=['Region', 'Year'], how='outer')

# calculate total value (10,000 leads to too high nuumber so I think its actually 1's)
capital_stock['ag_asset_value_yuan'] = capital_stock['rural_households_10000'] * capital_stock['ag_asset_value_yuan_per_household']

# convert to USD
capital_stock['Year'] = capital_stock['Year'].astype('int64')
capital_stock = capital_stock.merge(USD_YUAN, on='Year', how='outer')

capital_stock['ag_asset_value_USD'] = capital_stock['ag_asset_value_yuan'] / capital_stock['yuan_per_USD']

### Get annual growth rate from 2012 - 2024
year_cols = sorted(ag_power.columns.drop("Region"))

growth = ag_power.copy()
growth[year_cols] = ag_power[year_cols].pct_change(axis=1)

# make long
growth_long = growth.melt(
    id_vars=["Region"],      
    var_name="Year",           
    value_name="annual_growth_rate_ag_machinery_power"    
)

growth_long['Year'] = growth_long['Year'].astype('int64')

# merge 
capital_stock = capital_stock.merge(growth_long, on=['Region', 'Year'], how='outer')

# get value of capital stock from year before
capital_stock = capital_stock.sort_values(["Region", "Year"])

capital_stock["ag_asset_value_USD_prev_year"] = (
    capital_stock
    .groupby("Region")["ag_asset_value_USD"]
    .shift(1)
)

# split into years using growth rates and years not
capital_stock = capital_stock.sort_values(["Region", "Year"]).copy()

for region, grp_idx in capital_stock.groupby("Region").groups.items():
    grp = capital_stock.loc[grp_idx].sort_values("Year")

    for i in range(1, len(grp)):
        year = grp.iloc[i]["Year"]

        if year >= 2013:
            prev_value = grp.iloc[i - 1]["ag_asset_value_USD"]
            growth = grp.iloc[i]["annual_growth_rate_ag_machinery_power"]

            grp.iloc[i, grp.columns.get_loc("ag_asset_value_USD")] = (
                prev_value * (1 + growth)
            )

    capital_stock.loc[grp_idx, "ag_asset_value_USD"] = grp["ag_asset_value_USD"]

In [19]:
#### Clean

# convert to wide
capital_stock_wide = capital_stock.pivot(
    index='Region',
    columns='Year',
    values='ag_asset_value_USD'  
).reset_index()

# merge to get admin codes
capital_stock_wide = capital_stock_wide.merge(CHN_codes, left_on='Region', right_on='STAT_name', how='outer')

# add units
capital_stock_wide['Units'] = 'Ag capital stock - USD (nominal)'

# reorder columns
columns_to_keep = ['ADM_CODE', 'Units', 2005,        2006,        2007,        2008,
              2009,        2010,        2011,        2012,        2013,
              2014,        2015,        2016,        2017,        2018,
              2019,        2020,        2021,        2022,        2023,
              2024
]
capital_stock_wide = capital_stock_wide[columns_to_keep]

In [20]:
##### Save cleaned data
capital_stock_wide.to_csv(save_path, index=False)

In [None]:
capital_stock_wide