# Alive & Thrive
## Laos Data: Child Files - Preprocessing
## Prepared by Aaron Wise; aaron@a3di.dev
### Version: 30 September 2022

In [1]:
from std_utils import (
    read_spss_file,
    generate_HHID,
    add_total_year,
    run_quality_assurance,
    merge_hh_hl_data,
    export_analyzed_data
)

from children_analysis import (
    subset_children_file,
    create_sex_ch,
    create_ch_age_cat,
    create_excl_bf,
    create_cont_1223_bf,
    create_mdd_ch,
    create_mmf_ch,
    create_mad_ch
)

from women_analysis import create_mother_edu, divide_weight_million
from aw_analytics import output_mean_table

import numpy as np
import pandas as pd

pd.set_option("display.max_rows", 1500)
pd.set_option("display.max_columns", None)


### --- 2014 ---

In [None]:
# Set parameters
country = 'KHM'
recode = 'children'

# -------------------------------------------------------------------
year = '2014'
# -------------------------------------------------------------------

In [None]:
# Read file
df = read_spss_file(country, year, recode)

# Create HHID to facilitate merge of HH and HL data
generate_HHID(df, country, year, recode)

# Add Total, Year
add_total_year(df, year)

# Run quality assurance
run_quality_assurance(df)

# Merge in HH and HL data
df = merge_hh_hl_data(df, country, year)

# Subset children's file (completed)
df = subset_children_file(df, country, year)

#### Create Indicators

In [None]:
# --- Child Sex [sex_ch] --- #
df = create_sex_ch(df, country, year)

# --- Mother Education [mother_edu] --- #
df = create_mother_edu(df, country, year, recode = 'children')

# --- Age categories [ch_age_cat_X] --- #
df = create_ch_age_cat(df, country, year)

# Update weight
df = divide_weight_million(df, country, year, recode)

In [None]:
# --- Exclusive BF [excl_bf] --- #
df = create_excl_bf(df, country, year)

# --- Continued Breastfeeding 12-23 mos [cont_1223_bf] --- #
df = create_cont_1223_bf(df, country, year)

# --- Minimum Dietary Diversity [mdd_ch] --- #
df = create_mdd_ch(df, country, year)

# --- Minimum Meal Frequency [mmf_ch] --- #
df = create_mmf_ch(df, country, year)

# --- Minimum Acceptable Diet [mad_ch] --- #
df = create_mad_ch(df)

#### Subset and export working dataset

In [None]:
export_analyzed_data(df, country, year, recode)

### --- 2010 ---

In [None]:
# Set parameters
country = 'KHM'
recode = 'children'

# -------------------------------------------------------------------
year = '2010'
# -------------------------------------------------------------------

In [None]:
# Read file
df = read_spss_file(country, year, recode)

# Create HHID to facilitate merge of HH and HL data
generate_HHID(df, country, year, recode)

# Add Total, Year
add_total_year(df, year)

# Run quality assurance
run_quality_assurance(df)

# Merge in HH and HL data
df = merge_hh_hl_data(df, country, year)

# Subset children's file (completed)
df = subset_children_file(df, country, year)

#### Create indicators

In [None]:
# --- Child Sex [sex_ch] --- #
df = create_sex_ch(df, country, year)

# --- Mother Education [mother_edu] --- #
df = create_mother_edu(df, country, year, recode = 'children')

# --- Age categories [ch_age_cat_X] --- #
df = create_ch_age_cat(df, country, year)

# Update weight
df = divide_weight_million(df, country, year, recode)

In [None]:
# --- Exclusive BF [excl_bf] --- #
df = create_excl_bf(df, country, year)

# --- Continued Breastfeeding 12-23 mos [cont_1223_bf] --- #
df = create_cont_1223_bf(df, country, year)

# --- Minimum Dietary Diversity [mdd_ch] --- #
df = create_mdd_ch(df, country, year)

# --- Minimum Meal Frequency [mmf_ch] --- #
df = create_mmf_ch(df, country, year)

# --- Minimum Acceptable Diet [mad_ch] --- #
df = create_mad_ch(df)

#### Subset and export working dataset

In [None]:
export_analyzed_data(df, country, year, recode)

### --- 2005 ---

In [None]:
# Set parameters
country = 'KHM'
recode = 'children'

# -------------------------------------------------------------------
year = '2005'
# -------------------------------------------------------------------

In [None]:
# Read file
df = read_spss_file(country, year, recode)

# Create HHID to facilitate merge of HH and HL data
generate_HHID(df, country, year, recode)

# Add Total, Year
add_total_year(df, year)

# Run quality assurance
run_quality_assurance(df)

# Merge in HH and HL data
df = merge_hh_hl_data(df, country, year)

# Subset children's file (completed)
df = subset_children_file(df, country, year)

#### Create indicators

In [None]:
# --- Child Sex [sex_ch] --- #
df = create_sex_ch(df, country, year)

# --- Mother Education [mother_edu] --- #
df = create_mother_edu(df, country, year, recode = 'children')

# --- Age categories [ch_age_cat_X] --- #
df = create_ch_age_cat(df, country, year)

# Update weight
df = divide_weight_million(df, country, year, recode)

In [None]:
# --- Exclusive BF [excl_bf] --- #
df = create_excl_bf(df, country, year)

# --- Continued Breastfeeding 12-23 mos [cont_1223_bf] --- #
df = create_cont_1223_bf(df, country, year)

# --- Minimum Dietary Diversity [mdd_ch] --- #
df = create_mdd_ch(df, country, year)

# # --- Minimum Meal Frequency [mmf_ch] --- #
# df = create_mmf_ch(df, country, year)

# # --- Minimum Acceptable Diet [mad_ch] --- #
# df = create_mad_ch(df)

#### Subset and export working dataset

In [None]:
export_analyzed_data(df, country, year, recode)

### --- 2000 ---

In [2]:
# Set parameters
country = 'KHM'
recode = 'children'

# -------------------------------------------------------------------
year = '2000'
# -------------------------------------------------------------------

In [3]:
# Read file
df = read_spss_file(country, year, recode)

# Create HHID to facilitate merge of HH and HL data
generate_HHID(df, country, year, recode)

# Add Total, Year
add_total_year(df, year)

# Run quality assurance
run_quality_assurance(df)

# Merge in HH and HL data
df = merge_hh_hl_data(df, country, year)

# Subset children's file (completed)
df = subset_children_file(df, country, year)

The file -- ch_2000.sav -- has the following shape: Rows: 8834; Columns: 862
HHID is NOT unique
Drop columns if all values are NaN...
Updated -- Rows: 8834; Columns: 685
Checking if any rows are duplicates...
The are no duplicate rows
The number of children with a completed survey is: 8834


#### Create indicators

In [4]:
# --- Child Sex [sex_ch] --- #
df = create_sex_ch(df, country, year)

# --- Mother Education [mother_edu] --- #
df = create_mother_edu(df, country, year, recode = 'children')

# --- Age categories [ch_age_cat_X] --- #
df = create_ch_age_cat(df, country, year)

# Update weight
df = divide_weight_million(df, country, year, recode)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["ch_age_cat_0_5"] = np.where(df["age_in_months"] <= 5, 100, 0)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["ch_age_cat_6_8"] = np.where((df["age_in_months"] >= 6) & (df["age_in_months"] <= 8), 100, 0)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["ch_age_cat_9_23"] = np.where((df["age_

In [5]:
# # --- Exclusive BF [excl_bf] --- #
# df = create_excl_bf(df, country, year)

# --- Continued Breastfeeding 12-23 mos [cont_1223_bf] --- #
df = create_cont_1223_bf(df, country, year)

# # --- Minimum Dietary Diversity [mdd_ch] --- #
# df = create_mdd_ch(df, country, year)

# # --- Minimum Meal Frequency [mmf_ch] --- #
# df = create_mmf_ch(df, country, year)

# # --- Minimum Acceptable Diet [mad_ch] --- #
# df = create_mad_ch(df)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['breastmilk'] = np.where(df[var_breastmilk] == breastmilk_yes_values, 100, 0)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['cont_1223_bf'] = np.where(df['breastmilk'] == 100, 100, 0)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['cont_1223_bf'] = np.where(df['ch_age_cat_12_23'] == 0, np

#### Subset and export working dataset

In [6]:
export_analyzed_data(df, country, year, recode)

In [None]:
var = 'cont_1223_bf'
ind_vars = ['Total']
weight = ['chweight']

output_mean_table(df, var, ind_vars, weight)