# Alive & Thrive
## Viet Nam Data: Women Files - Preprocessing
## Prepared by Aaron Wise; aaron@a3di.dev
### Version: 25 May 2022

In [1]:
# %load std_imports
from pathlib import Path

import numpy as np
import pandas as pd

import json
import pyreadstat

from std_utils import (
    read_file,
    run_quality_assurance,
    generate_HHID,
    merge_hh_hl_data,
    subset_df,
    standardize_col_names,
    standardize_col_values,
    create_elderly_hoh,
    save_merge,
    export_analyzed_data
)

pd.set_option("display.max_rows", 1500)
pd.set_option("display.max_columns", None)


In [2]:
# %load women_imports.py
from women_analysis import (
    generate_str_replace_dict,
    create_anc_4_visits,
    create_anc_3_components,
    create_inst_delivery,
    create_caesarean_del,
    create_pnc_mother,
    create_low_bw,
    create_early_bf,
    create_mother_edu,
    subset_women_file
)

from aw_analytics import mean_wt, output_mean_table


### --- 2021 ---

In [None]:
# Set parameters
country = 'VNM'
recode = 'women'

# -------------------------------------------------------------------
year = '2021'
# -------------------------------------------------------------------

In [None]:
# Read file
df = read_file(country, year, recode)

# Create HHID to facilitate merge of HH and HL data
generate_HHID(df, country, year, recode)

# Run quality assurance
run_quality_assurance(df)

# Merge in HH and HL data
df = merge_hh_hl_data(df, country, year)

# Subset women file
df = subset_women_file(df, country, year)

#### Create Indicators

In [None]:
# --- Create Mother edu [mother_edu] --- #
df = create_mother_edu(df, country, year)

In [None]:
# --- ANC 4+ visits [anc_4_visits] --- #
df = create_anc_4_visits(df, country, year)

# --- ANC components [anc_3_components] --- #
df = create_anc_3_components(df, country, year)

# --- Institutional [inst_delivery] --- #
df = create_inst_delivery(df, country, year)

# --- Caesarean Delivery [caesarean_del] --- #
df = create_caesarean_del(df, country, year)

# Post-natal Health Check (mother) [pnc_mother]
df = create_pnc_mother(df, country, year)

# # --- Low birthweight [low_bw] --- #
# df = create_low_bw(df, country, year)

# --- Early Initiation BF [early_bf] --- #
df = create_early_bf(df, country, year)

### Export working variables

In [None]:
export_analyzed_data(df, country, year, recode)

### --- 2014 ---

In [None]:
# Set parameters
country = 'VNM'
recode = 'women'

# -------------------------------------------------------------------
year = '2014'
# -------------------------------------------------------------------

In [None]:
# Read file
df = read_file(country, year, recode)

# Create HHID to facilitate merge of HH and HL data
generate_HHID(df, country, year, recode)

# Run quality assurance
run_quality_assurance(df)

# Merge in HH and HL data
df = merge_hh_hl_data(df, country, year)

# Subset women file
df = subset_women_file(df, country, year)

In [None]:
# --- Create Mother edu [mother_edu] --- #
df = create_mother_edu(df, country, year)

In [None]:
# --- ANC 4+ visits [anc_4_visits] --- #
df = create_anc_4_visits(df, country, year)

# --- ANC components [anc_3_components] --- #
df = create_anc_3_components(df, country, year)

# --- Institutional [inst_delivery] --- #
df = create_inst_delivery(df, country, year)

# --- Caesarean Delivery [caesarean_del] --- #
df = create_caesarean_del(df, country, year)

# Post-natal Health Check (mother) [pnc_mother]
df = create_pnc_mother(df, country, year)

# # --- Low birthweight [low_bw] --- #
# df = create_low_bw(df, country, year)

# --- Early Initiation BF [early_bf] --- #
df = create_early_bf(df, country, year)

#### Export working variables

In [None]:
export_analyzed_data(df, country, year, recode)

### --- 2011 ---

In [3]:
# Set parameters
country = 'VNM'
recode = 'women'

# -------------------------------------------------------------------
year = '2011'
# -------------------------------------------------------------------

In [4]:
# Read file
df = read_file(country, year, recode)

# Create HHID to facilitate merge of HH and HL data
generate_HHID(df, country, year, recode)

# Run quality assurance
run_quality_assurance(df)

# Merge in HH and HL data
df = merge_hh_hl_data(df, country, year)

# # Subset women file
# df = subset_women_file(df, country, year)

The file -- wm_2011.sav -- has the following shape: Rows: 12115; Columns: 235
HHID is NOT unique
Drop columns if all values are NaN...
Updated -- Rows: 12115; Columns: 238
Checking if any rows are duplicates...
The are no duplicate rows


In [5]:
df.head(2)

Unnamed: 0,HH1,HH2,LN,WMA,WMB,WMC,WM1,WM2,WM4,WM5,WM6D,WM6M,WM6Y,WM7,WM8,WM9,WM10H,WM10M,WM11H,WM11M,WB1M,WB1Y,WB2,WB3,WB4,WB5,WB7,CM1,CM2D,CM2M,CM2Y,CM3,CM4,CM5A,CM5B,CM6,CM7A,CM7B,CM8,CM9A,CM9B,CM10,CM12D,CM12M,CM12Y,CM13,DB1,DB2,DB3U,DB3N,MN1,MN2A,MN2B,MN2C,MN2F,MN2G,MN2X,MN3,MN4A,MN4B,MN4C,MN5,MN6,MN7,MN9,MN10,MN11,MN17A,MN17B,MN17C,MN17F,MN17G,MN17H,MN17X,MN17Y,MN18,MN19,MN20,MN21,MN22A,MN22,MN23,MN24,MN25U,MN25N,MN26,MN27A,MN27B,MN27C,MN27D,MN27E,MN27F,MN27G,MN27H,MN27I,MN27J,MN27X,IS2A,IS2B,IS2C,IS2D,IS2E,IS2F,IS2G,IS2H,IS2I,IS2X,IS2Y,IS2Z,CP1,CP2,CP3A,CP3B,CP3C,CP3D,CP3E,CP3F,CP3G,CP3H,CP3I,CP3J,CP3K,CP3L,CP3M,CP3X,UN2,UN3,UN4,UN6,UN7U,UN7N,UN10,UN11A,UN11B,UN11C,UN11D,UN11E,UN11F,UN11G,UN11H,UN11I,UN11X,UN11Z,UN13U,UN13N,DV1A,DV1B,DV1C,DV1D,DV1E,MA1,MA2,MA3,MA4,MA5,MA6,MA7,MA8M,MA8Y,MA9,SB1,SB2,SB3U,SB3N,SB4,SB5,SB7,SB8,SB9,SB10,SB12,SB13,SB14,SB15,HA1,HA2,HA3,HA4,HA5,HA6,HA7,HA8A,HA8B,HA8C,HA9,HA10,HA11,HA12,HA15A,HA15B,HA15C,HA15D,HA16,HA17,HA18,HA20,HA21,HA22,HA23,HA24,HA25,HA26,HA27,TNLN,TN4,TN5,TN6,TN8,TN9,TN10,TN11,TN12_1,TN12_2,TN12_3,TN12_4,HH6,HH7,WDOI,WDOB,WAGE,WDOM,WAGEM,WDOBFC,WDOBLC,MSTATUS,CEB,CSURV,CDEAD,PSU,stratum,wscore,windex5,welevel,ethnicity,wmweight,HHID,Total,Year,residence,region,wealth_q,eth_hoh,elderly_hoh,sex_hoh
0,1.0,1.0,3.0,,1.0,10.0,1.0,1.0,3.0,235.0,29.0,11.0,2010.0,Completed,232.0,6.0,10.0,9.0,10.0,40.0,12.0,1984.0,25.0,Yes,Upper Secondary,12.0,,Yes,28.0,6.0,2003.0,,Yes,0.0,2.0,No,,,No,,,2.0,18.0,6.0,2009.0,Yes,Yes,,,,Yes,Doctor,,,,,,15.0,Yes,Yes,Yes,Yes (card not seen),Yes,1.0,Yes,1.0,2.0,Doctor,,,,,,,,Government hospital,No,Average,Yes,Card,2.95,Yes,Yes,Hours,0.0,Yes,,,,,,,Infant formula,Tea / Infusions,,,,,,Child develops a fever,Child has fast breathing,,,,Child is vomiting,,,,,No,Yes,,,,,,,Male condom,,,,,Periodic abstinence / Rhythm,,,,,,Have (a/another) child,Years,2.0,,,,,,,,,,,,,Weeks ago,4.0,No,No,No,No,No,"Yes, currently married",50.0,No,,,,Only once,7.0,2002.0,,18.0,No,Days ago,10.0,No,Husband,,No,,,,,,1.0,Yes,Yes,No,Yes,No,No,Yes,Yes,Yes,Yes,Yes,Yes,No,Yes,No,Yes,No,No,No,,,No,,,,No,,,Yes,2.0,Observed,Other net,More than 36 months ago,No,No,,Yes,2.0,3.0,5.0,No one / No one else (Khong co ai nua),Urban,Red River Delta,1331.0,1020.0,25-29,1231.0,17.0,1242.0,1314.0,Currently married/in union,2.0,2.0,0.0,1.0,1.0,1.468632,Richest,Upper Secondary,Kinh,0.567959,1001,Total,2011,Urban,Red River Delta,Richest,Kinh and Hoa,Elderly HoH: YES,Female
1,1.0,2.0,2.0,,1.0,10.0,1.0,2.0,2.0,234.0,30.0,11.0,2010.0,Completed,232.0,6.0,17.0,59.0,18.0,32.0,3.0,1961.0,49.0,Yes,College/University and above,,,Yes,21.0,2.0,1987.0,,Yes,1.0,0.0,No,,,No,,,1.0,21.0,2.0,1987.0,No,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,No,No,,,,,,,,,,,,,,,,,,No more / None,,,No,,Menopausal,,,,,,,,,,Years ago,4.0,No,No,No,No,No,"Yes, currently married",70.0,No,,,,Only once,3.0,1986.0,,25.0,No,Weeks ago,2.0,No,Husband,,No,,,,,,1.0,Yes,Yes,No,Yes,No,No,Yes,Yes,Yes,Yes,No,No,No,Yes,,,,,,,,,,,,Yes,12-23 months ago,Yes,,2.0,Not observed,Other net,6.0,No,No,,Yes,2.0,No one / No one else (Khong co ai nua),,,Urban,Red River Delta,1331.0,735.0,45-49,1035.0,25.0,1046.0,,Currently married/in union,1.0,1.0,0.0,1.0,1.0,1.723942,Richest,Tertiary,Kinh,0.567959,1002,Total,2011,Urban,Red River Delta,Richest,Kinh and Hoa,Elderly HoH: YES,Male
