In [1]:
##### Cleans Canadian capital stock data
# removes unnecessary variables and geographies and reformats data

import os
import pandas as pd

In [2]:
##### Load data

# Get the current working directory
cd = os.path.dirname(os.getcwd())

# Import data
capital_stock = pd.read_csv(f"{cd}/Data/Raw/Sub_National/CAD_ag_census/farm_capital_assets_01152026.csv")

CAD_CCS = pd.read_csv(f"{cd}/Data/Correspondence_tables/CAD_CCS.csv")

# Set save path
save_path = f"{cd}/Data/Clean/Capital_stock/CAD_census_capital_stock.csv"

In [3]:
##### Clean capital stock

# drop unnecessary columns
columns_to_keep = ['GEO', 'DGUID', 'Farm capital', 'VALUE']
capital_stock = capital_stock[columns_to_keep]

# keep only total farm capital assets
capital_stock = capital_stock[capital_stock['Farm capital'] == 'Total farm capital']

# split GEO to get names and codes
capital_stock['Geo_name'] = capital_stock['GEO'].str.extract(r"^(.*?)(?:\s*\[)")
capital_stock['Geo_code'] = capital_stock['GEO'].str.extract(r"\[(.*?)\]")

# keep only data on census subdivisions
capital_stock = capital_stock[capital_stock['Geo_code'].str.startswith("CCS")]

# extract CCSUID
capital_stock['CCSUID'] = capital_stock['DGUID'].str[-7:].astype('int64')

# merge with full CCS
capital_stock = capital_stock.merge(CAD_CCS, on='CCSUID', how='right')

# fill missing with 0's
capital_stock['VALUE'] = capital_stock['VALUE'].fillna(0)

# convert to USD using Bank of Canada average annual exchange rate for 2021 (https://www.bankofcanada.ca/rates/exchange/annual-average-exchange-rates/)
USD_per_CAD_2021 = 1.2535
capital_stock['2021'] = capital_stock['VALUE']/USD_per_CAD_2021

# add units
capital_stock['Units'] = 'Ag capital stock - USD (nominal)'

# re-order columns
columns_to_keep = ['CCSUID', 'Units', '2021']
capital_stock = capital_stock[columns_to_keep]


In [4]:
# Save cleaned data
capital_stock.to_csv(save_path, index=False)