## Extract HH variables for equity stratifiers
### ** Ethnicity of HoH **

In [1]:
# %load std_imports.py
from pathlib import Path

import numpy as np
import pandas as pd

import json
import pyreadstat

from std_utils import (
    read_file,
    run_quality_assurance,
    generate_HHID,
    merge_hh_hl_data,
    subset_df,
    standardize_col_names,
    standardize_col_values,
    save_merge,
)

pd.set_option("display.max_rows", 1500)
pd.set_option("display.max_columns", None)


In [2]:
# Set parameters
country = 'VNM'
recode = 'household'
var_rename = ['residence', 'region', 'wealth', 'eth_hoh']
var_replace = ['residence', 'region', 'eth_hoh']

# -------------------------------------------------------------------
year = '2011'
# -------------------------------------------------------------------


In [3]:
# Read file
df = read_file(country, year, recode)

# Create HHID to facilitate merge of HH and HL data
generate_HHID(df, country, year, recode)

# Standardize column names
df = standardize_col_names(df, country, year, recode, var_rename)

# Standardize column values
df = standardize_col_values(df, country, year, recode, var_replace)

# Save to merge folder
save_merge(df, country, year, recode)

The file -- hh_2011.sav -- has the following shape: Rows: 11874; Columns: 127
HHID is unique
var_replace_nested_dict is: 
 {'residence': {'URBAN': 'Urban', 'RURAL': 'Rural'}, 'region': {'South East': 'Southeast', 'Red River Delta': 'Red River Delta', 'Northen Midlands and Mountain area': 'Northern Midlands and Mountain', 'Mekong River Delta': 'Mekong River Delta', 'North Central and Central Coastal area': 'North Central and Central Coastal', 'Central Highlands': 'Central Highlands'}, 'eth_hoh': {'Kinh': 'Kinh and Hoa', 'Non-Kinh': 'Tay, Thai, Muong, Nung', 'Missing/DK': 'Other/Missing'}}
