In [28]:
##### Cleans Russia labor data
# aggregate to year, clean, and reformats

import os
import pandas as pd

In [29]:
##### Load data

# Get the current working directory
cd = os.path.dirname(os.getcwd())

# Import data
labor = pd.read_csv(f"{cd}/Data/Raw/Sub_National/Russia/Russia_employment.csv")

RUS_codes = pd.read_csv(f"{cd}/Data/Correspondence_tables/RUS_regions.csv")

# Set save path
save_path = f"{cd}/Data/Clean/Labor/RUS_labor.csv"

In [30]:
##### Clean

# filter to just ag 
labor = labor[labor['id_okved'] == 'AGRICULTURE, FORESTRY, HUNTING, FISHING AND FISH FARMING']

# drop non-region geographies
labor = labor.dropna()

# get average value in each region 
labor['period'] = pd.to_datetime(
    labor['period'],
    format='%m/%d/%y %H:%M'
)

labor['year'] = labor['period'].dt.year

labor_mean = labor.groupby(['region', 'year'])['value'].mean().reset_index()

# reformat to wide
labor_wide = labor_mean.pivot(
    index='region',
    columns='year',
    values='value'
).reset_index()

# add units
labor_wide['Units'] = 'Ag labor - jobs'

# merge with census ID's
labor_wide = labor_wide.merge(RUS_codes, left_on='region', right_on='Census_Name', how='inner')
labor_wide= labor_wide.dropna()

# keep only needed columns
col_to_keep = ['HASC_1', 'Units', 2019, 2020, 2021, 2022, 2023, 2024, 2025]

labor_wide = labor_wide[col_to_keep]

In [31]:
# Save
labor_wide.to_csv(save_path, index=False)