# Alive & Thrive
## Viet Nam Data: Child Files - Preprocessing
## Prepared by Aaron Wise; aaron@a3di.dev
### Version: 28 May 2022

In [1]:
# %load std_imports.py
from pathlib import Path

import numpy as np
import pandas as pd

import json
import pyreadstat

from std_utils import (
    read_spss_file,
    run_quality_assurance,
    generate_HHID,
    generate_MEMID,
    subset_edu_save,
    add_total_year,
    merge_hh_hl_data,
    subset_hl_df,
    standardize_col_names,
    standardize_col_values,
    create_elderly_hoh,
    save_merge,
    export_analyzed_data
)

pd.set_option("display.max_rows", 1500)
pd.set_option("display.max_columns", None)


In [2]:
# %load children_imports.py
from children_analysis import (
    subset_children_file,
    generate_MOMID,
    merge_mother_edu,
    create_sex_ch,
    create_ch_age_cat,
    create_excl_bf,
    create_cont_1223_bf,
    create_mdd_ch,
    create_mmf_ch,
    create_mad_ch
)

from aw_analytics import mean_wt, output_mean_table

from women_analysis import create_mother_edu

### --- 2021 ---

In [3]:
# Set parameters
country = 'VNM'
recode = 'children'

# -------------------------------------------------------------------
year = '2021'
# -------------------------------------------------------------------

In [4]:
# Read file
df = read_spss_file(country, year, recode)

# Create HHID to facilitate merge of HH and HL data
generate_HHID(df, country, year, recode)

# Create WMID to facilitate merge into U5 file
generate_MOMID(df, country, year)

# Add Total, Year
add_total_year(df, year)

# Run quality assurance
run_quality_assurance(df)

# Merge in HH and HL data
df = merge_hh_hl_data(df, country, year)

# Merge in Mother Education data
df = merge_mother_edu(df, country, year)

# Subset children's file (completed)
df = subset_children_file(df, country, year)

The file -- ch_2021.sav -- has the following shape: Rows: 4404; Columns: 537
HHID is NOT unique
MOMID is NOT unique
Drop columns if all values are NaN...
Updated -- Rows: 4404; Columns: 537
Checking if any rows are duplicates...
The are no duplicate rows
The number of children with a completed survey is: 4329


#### Create Indicators

In [5]:
# --- Child Sex [sex_ch] --- #
df = create_sex_ch(df, country, year)

# --- Mother Education [mother_edu] --- #
df = create_mother_edu(df, country, year, recode='children')

# --- Age categories [ch_age_cat_X] --- #
df = create_ch_age_cat(df, country, year)

In [6]:
# --- Exclusive BF [excl_bf] --- #
df = create_excl_bf(df, country, year)

# --- Continued Breastfeeding 12-23 mos [cont_1223_bf] --- #
df = create_cont_1223_bf(df)

# --- Minimum Dietary Diversity [mdd_ch] --- #
df = create_mdd_ch(df, country, year)

# --- Minimum Meal Frequency [mmf_ch] --- #
df = create_mmf_ch(df, country, year)

# --- Minimum Acceptable Diet [mad_ch] --- #
df = create_mad_ch(df)

#### Subset and export working dataset

In [7]:
export_analyzed_data(df, country, year, recode)

### --- 2014 ---

In [8]:
# Set parameters
country = 'VNM'
recode = 'children'

# -------------------------------------------------------------------
year = '2014'
# -------------------------------------------------------------------

In [9]:
# Read file
df = read_spss_file(country, year, recode)

# Create HHID to facilitate merge of HH and HL data
generate_HHID(df, country, year, recode)

# Create WMID to facilitate merge into U5 file
generate_MOMID(df, country, year)

# Add Total, Year
add_total_year(df, year)

# Run quality assurance
run_quality_assurance(df)

# Merge in HH and HL data
df = merge_hh_hl_data(df, country, year)

# Merge in Mother Education data
df = merge_mother_edu(df, country, year)

# Subset children's file (completed)
df = subset_children_file(df, country, year)

The file -- ch_2014.sav -- has the following shape: Rows: 3346; Columns: 344
HHID is NOT unique
MOMID is NOT unique
Drop columns if all values are NaN...
Updated -- Rows: 3346; Columns: 348
Checking if any rows are duplicates...
The are no duplicate rows
The number of children with a completed survey is: 3316


#### Create indicators

In [10]:
# --- Child Sex [sex_ch] --- #
df = create_sex_ch(df, country, year)

# --- Mother Education [mother_edu] --- #
df = create_mother_edu(df, country, year, recode='children')

# --- Age categories [ch_age_cat_X] --- #
df = create_ch_age_cat(df, country, year)

In [11]:
# --- Exclusive BF [excl_bf] --- #
df = create_excl_bf(df, country, year)

# --- Continued Breastfeeding 12-23 mos [cont_1223_bf] --- #
df = create_cont_1223_bf(df)

# --- Minimum Dietary Diversity [mdd_ch] --- #
df = create_mdd_ch(df, country, year)

# --- Minimum Meal Frequency [mmf_ch] --- #
df = create_mmf_ch(df, country, year)

# --- Minimum Acceptable Diet [mad_ch] --- #
df = create_mad_ch(df)

#### Subset and export working dataset

In [12]:
export_analyzed_data(df, country, year, recode)

### --- 2011 ---

In [13]:
# Set parameters
country = 'VNM'
recode = 'children'

# -------------------------------------------------------------------
year = '2011'
# -------------------------------------------------------------------

In [14]:
# Read file
df = read_spss_file(country, year, recode)

# Create HHID to facilitate merge of HH and HL data
generate_HHID(df, country, year, recode)

# Create WMID to facilitate merge into U5 file
generate_MOMID(df, country, year)

# Add Total, Year
add_total_year(df, year)

# Run quality assurance
run_quality_assurance(df)

# Merge in HH and HL data
df = merge_hh_hl_data(df, country, year)

# Merge in Mother Education data
df = merge_mother_edu(df, country, year)

# Subset children's file (completed)
df = subset_children_file(df, country, year)

The file -- ch_2011.sav -- has the following shape: Rows: 3729; Columns: 300
HHID is NOT unique
MOMID is NOT unique
Drop columns if all values are NaN...
Updated -- Rows: 3729; Columns: 304
Checking if any rows are duplicates...
The are no duplicate rows
The number of children with a completed survey is: 3678


#### Create indicators

In [15]:
# --- Child Sex [sex_ch] --- #
df = create_sex_ch(df, country, year)

# --- Mother Education [mother_edu] --- #
df = create_mother_edu(df, country, year, recode='children')

# --- Age categories [ch_age_cat_X] --- #
df = create_ch_age_cat(df, country, year)

In [16]:
# --- Exclusive BF [excl_bf] --- #
df = create_excl_bf(df, country, year)

# --- Continued Breastfeeding 12-23 mos [cont_1223_bf] --- #
df = create_cont_1223_bf(df)

# # --- Minimum Dietary Diversity [mdd_ch] --- #
# df = create_mdd_ch(df, country, year)

# --- Minimum Meal Frequency [mmf_ch] --- #
df = create_mmf_ch(df, country, year)

# # --- Minimum Acceptable Diet [mad_ch] --- #
# df = create_mad_ch(df)

#### Subset and export working dataset

In [17]:
export_analyzed_data(df, country, year, recode)

### --- 2006 ---

In [18]:
# Set parameters
country = 'VNM'
recode = 'children'

# -------------------------------------------------------------------
year = '2006'
# -------------------------------------------------------------------

In [19]:
# Read file
df = read_spss_file(country, year, recode)

# Create HHID to facilitate merge of HH and HL data
generate_HHID(df, country, year, recode)

# Create WMID to facilitate merge into U5 file
generate_MOMID(df, country, year)

# Add Total, Year
add_total_year(df, year)

# Run quality assurance
run_quality_assurance(df)

# Merge in HH and HL data
df = merge_hh_hl_data(df, country, year)

# Merge in Mother Education data
df = merge_mother_edu(df, country, year)

# Subset children's file (completed)
df = subset_children_file(df, country, year)

The file -- ch_2006.sav -- has the following shape: Rows: 2680; Columns: 297
HHID is NOT unique
MOMID is NOT unique
Drop columns if all values are NaN...
Updated -- Rows: 2680; Columns: 300
Checking if any rows are duplicates...
The are no duplicate rows
The number of children with a completed survey is: 2680


#### Create indicators

In [20]:
# --- Child Sex [sex_ch] --- #
df = create_sex_ch(df, country, year)

# --- Mother Education [mother_edu] --- #
df = create_mother_edu(df, country, year, recode='children')

# --- Age categories [ch_age_cat_X] --- #
df = create_ch_age_cat(df, country, year)

In [21]:
# --- Exclusive BF [excl_bf] --- #
df = create_excl_bf(df, country, year)

# --- Continued Breastfeeding 12-23 mos [cont_1223_bf] --- #
df = create_cont_1223_bf(df)

# # --- Minimum Dietary Diversity [mdd_ch] --- #
# df = create_mdd_ch(df, country, year)

# # --- Minimum Meal Frequency [mmf_ch] --- #
# df = create_mmf_ch(df, country, year)

# # --- Minimum Acceptable Diet [mad_ch] --- #
# df = create_mad_ch(df)

#### Subset and export working dataset

In [22]:
export_analyzed_data(df, country, year, recode)

### --- 2000 ---

In [23]:
# Set parameters
country = 'VNM'
recode = 'children'

# -------------------------------------------------------------------
year = '2000'
# -------------------------------------------------------------------

In [24]:
# Read file
df = read_spss_file(country, year, recode)

# Create HHID to facilitate merge of HH and HL data
generate_HHID(df, country, year, recode)

# Create WMID to facilitate merge into U5 file
generate_MOMID(df, country, year)

# Add Total, Year
add_total_year(df, year)

# Run quality assurance
run_quality_assurance(df)

# Merge in HH and HL data
df = merge_hh_hl_data(df, country, year)

# Merge in Mother Education data
df = merge_mother_edu(df, country, year)

# Subset children's file (completed)
df = subset_children_file(df, country, year)

The file -- ch_2000.sav -- has the following shape: Rows: 3105; Columns: 251
HHID is NOT unique
MOMID is NOT unique
Drop columns if all values are NaN...
Updated -- Rows: 3105; Columns: 254
Checking if any rows are duplicates...
The are no duplicate rows
The number of children with a completed survey is: 3105


#### Create indicators

In [25]:
# --- Child Sex [sex_ch] --- #
df = create_sex_ch(df, country, year)

# --- Mother Education [mother_edu] --- #
df = create_mother_edu(df, country, year, recode='children')

# --- Age categories [ch_age_cat_X] --- #
df = create_ch_age_cat(df, country, year)

In [26]:
# --- Exclusive BF [excl_bf] --- #
df = create_excl_bf(df, country, year)

# --- Continued Breastfeeding 12-23 mos [cont_1223_bf] --- #
df = create_cont_1223_bf(df)

# # --- Minimum Dietary Diversity [mdd_ch] --- #
# df = create_mdd_ch(df, country, year)

# # --- Minimum Meal Frequency [mmf_ch] --- #
# df = create_mmf_ch(df, country, year)

# # --- Minimum Acceptable Diet [mad_ch] --- #
# df = create_mad_ch(df)

#### Subset and export working dataset

In [27]:
export_analyzed_data(df, country, year, recode)

In [None]:
var = 'excl_bf'
ind_vars = ['sex_ch']
weight = ['CHWEIGHT']

output_mean_table(df, var, ind_vars, weight)