# Alive & Thrive
## Laos Data: Women Files - Preprocessing
## Prepared by Aaron Wise; aaron@a3di.dev
### Version: 16 August 2022

In [18]:
from std_utils import (
    read_spss_file,
    generate_HHID,
    add_total_year,
    run_quality_assurance,
    merge_hh_hl_data,
    export_analyzed_data
)

from women_analysis import (
    subset_women_file,
    create_mother_edu,
    create_anc_4_visits,
    create_anc_3_components,
    create_inst_delivery,
    create_caesarean_del,
    create_pnc_mother,
    create_low_bw,
    create_early_bf,
    create_iron_supp,
    update_no_response
)

from aw_analytics import output_mean_table

import numpy as np
import pandas as pd

### --- 2017 ---

In [None]:
# Set parameters
country = 'LAO'
recode = 'women'

# -------------------------------------------------------------------
year = '2017'
# -------------------------------------------------------------------

In [None]:
# Read file
df = read_spss_file(country, year, recode)

# Create HHID to facilitate merge of HH and HL data
generate_HHID(df, country, year, recode)

# Add Total, Year
add_total_year(df, year)

# Run quality assurance
run_quality_assurance(df)

# Merge in HH and HL data
df = merge_hh_hl_data(df, country, year)

# Subset women file
df = subset_women_file(df, country, year)

In [None]:
# --- Clean out NO RESPONSE instances --- #
df = update_no_response(df, country, year)

#### Create Indicators

In [None]:
# --- Create Mother edu [mother_edu] --- #
df = create_mother_edu(df, country, year, recode)

In [None]:
# --- ANC 4+ visits [anc_4_visits] --- #
df = create_anc_4_visits(df, country, year)

# --- ANC components [anc_3_components] --- #
df = create_anc_3_components(df, country, year)

# --- Institutional [inst_delivery] --- #
df = create_inst_delivery(df, country, year)

# --- Caesarean Delivery [caesarean_del] --- #
df = create_caesarean_del(df, country, year)

# Post-natal Health Check (mother) [pnc_mother]
df = create_pnc_mother(df, country, year)

# --- Low birthweight [low_bw] --- #
df = create_low_bw(df, country, year)

# --- Early Initiation BF [early_bf] --- #
df = create_early_bf(df, country, year)

# --- Iron Supplementation [iron_supp] --- #
df = create_iron_supp(df, country, year)

### Export working variables

In [None]:
export_analyzed_data(df, country, year, recode)

### --- 2012 ---

In [None]:
# Set parameters
country = 'LAO'
recode = 'women'

# -------------------------------------------------------------------
year = '2012'
# -------------------------------------------------------------------

In [None]:
# Read file
df = read_spss_file(country, year, recode)

# Create HHID to facilitate merge of HH and HL data
generate_HHID(df, country, year, recode)

# Add Total, Year
add_total_year(df, year)

# Run quality assurance
run_quality_assurance(df)

# Merge in HH and HL data
df = merge_hh_hl_data(df, country, year)

# Subset women file
df = subset_women_file(df, country, year)

In [None]:
# # --- Clean out NO RESPONSE instances --- #
# df = update_no_response(df, country, year)

#### Create Indicators

In [None]:
# --- Create Mother edu [mother_edu] --- #
df = create_mother_edu(df, country, year, recode)

In [None]:
# --- ANC 4+ visits [anc_4_visits] --- #
df = create_anc_4_visits(df, country, year)

# --- ANC components [anc_3_components] --- #
df = create_anc_3_components(df, country, year)

# --- Institutional [inst_delivery] --- #
df = create_inst_delivery(df, country, year)

# --- Caesarean Delivery [caesarean_del] --- #
df = create_caesarean_del(df, country, year)

# Post-natal Health Check (mother) [pnc_mother]
df = create_pnc_mother(df, country, year)

# --- Low birthweight [low_bw] --- #
df = create_low_bw(df, country, year)

# --- Early Initiation BF [early_bf] --- #
df = create_early_bf(df, country, year)

# --- Iron Supplementation [iron_supp] --- #
df = create_iron_supp(df, country, year)

### Export working variables

In [None]:
export_analyzed_data(df, country, year, recode)

### --- 2006 ---

In [19]:
# Set parameters
country = 'LAO'
recode = 'women'

# -------------------------------------------------------------------
year = '2006'
# -------------------------------------------------------------------

In [20]:
# Read file
df = read_spss_file(country, year, recode)

# Create HHID to facilitate merge of HH and HL data
generate_HHID(df, country, year, recode)

# Add Total, Year
add_total_year(df, year)

# Run quality assurance
run_quality_assurance(df)

# Merge in HH and HL data
df = merge_hh_hl_data(df, country, year)

# Subset women file
df = subset_women_file(df, country, year)

The file -- wm_2006.sav -- has the following shape: Rows: 7703; Columns: 182
HHID is NOT unique
Drop columns if all values are NaN...
Updated -- Rows: 7703; Columns: 184
Checking if any rows are duplicates...
The are no duplicate rows
The number of mothers with a birth in the past two years is: 1622


#### Create Indicators

In [21]:
# --- Create Mother edu [mother_edu] --- #
df = create_mother_edu(df, country, year, recode)

In [22]:
# # --- ANC 4+ visits [anc_4_visits] --- #
# df = create_anc_4_visits(df, country, year)

# --- ANC components [anc_3_components] --- #
df = create_anc_3_components(df, country, year)

# --- Institutional [inst_delivery] --- #
df = create_inst_delivery(df, country, year)

# # --- Caesarean Delivery [caesarean_del] --- #
# df = create_caesarean_del(df, country, year)

# # Post-natal Health Check (mother) [pnc_mother]
# df = create_pnc_mother(df, country, year)

# --- Low birthweight [low_bw] --- #
df = create_low_bw(df, country, year)

# --- Early Initiation BF [early_bf] --- #
df = create_early_bf(df, country, year)

# # --- Iron Supplementation [iron_supp] --- #
# df = create_iron_supp(df, country, year)

agg_value_prop_dict is: 
 {'Very small': 0.5, 'Average': 0.06794871794871794, 'Smaller than average': 0.53125, 'Larger than average': 0.020833333333333332, 'Very large': 0.0, 'DK': nan, 'Missing': 0.25}


  agg_value_prop_dict[agg_value] = numerator / denominator


### Export working variables

In [23]:
export_analyzed_data(df, country, year, recode)

### --- 2000 ---

In [25]:
# Set parameters
country = 'LAO'
recode = 'women'

# -------------------------------------------------------------------
year = '2000'
# -------------------------------------------------------------------

In [26]:
# Read file
df = read_spss_file(country, year, recode)

# # Create HHID to facilitate merge of HH and HL data
# generate_HHID(df, country, year, recode)

# # Add Total, Year
# add_total_year(df, year)

# # Run quality assurance
# run_quality_assurance(df)

# # Merge in HH and HL data
# df = merge_hh_hl_data(df, country, year)

# # Subset women file
# df = subset_women_file(df, country, year)

The file -- wm_2000.sav -- has the following shape: Rows: 3633; Columns: 273


In [24]:
var = 'early_bf'
ind_vars = ['eth_hoh']
wt = 'wmweight'

output_mean_table(df, var, ind_vars, wt)

Unnamed: 0_level_0,early_bf,Weighted_Count
eth_hoh,Unnamed: 1_level_1,Unnamed: 2_level_1
Hmong,31.1,238.8
Khmou,31.2,220.0
Lao,34.5,734.1
Missing,0.0,0.9
Other Language,17.8,338.5
