# Alive & Thrive
## Laos Data: Women Files - Preprocessing
## Prepared by Aaron Wise; aaron@a3di.dev
### Version: 16 August 2022

In [43]:
from std_utils import (
    read_csv_file,
    generate_HHID,
    add_total_year,
    run_quality_assurance,
    merge_hh_hl_data,
    export_analyzed_data
)

from women_analysis import (
    subset_women_file,
    create_mother_edu,
    create_anc_4_visits,
    create_anc_3_components,
    create_inst_delivery,
    create_caesarean_del,
    create_pnc_mother,
    create_low_bw,
    create_early_bf,
    create_iron_supp,
    divide_weight_million
)

from aw_analytics import output_mean_table

### --- 2014 ---

In [44]:
# Set parameters
country = 'KHM'
recode = 'women'

# -------------------------------------------------------------------
year = '2014'
# -------------------------------------------------------------------

In [45]:
# Read file
df = read_csv_file(country, year, recode)

# Create HHID to facilitate merge of HH and HL data
generate_HHID(df, country, year, recode)

# Add Total, Year
add_total_year(df, year)

# Run quality assurance
run_quality_assurance(df)

# Merge in HH and HL data
df = merge_hh_hl_data(df, country, year)

# Subset women file
df = subset_women_file(df, country, year)

The file -- wm_2014.csv -- has the following shape: Rows: 17578; Columns: 22
HHID is NOT unique
Drop columns if all values are NaN...
Updated -- Rows: 17578; Columns: 25
Checking if any rows are duplicates...
The are no duplicate rows
The number of mothers with a birth in the past two years is: 2899


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['two_years'] = np.where(df[var_doi] - df[var_dob] <= 23, 100, 0)


#### Create Indicators

In [46]:
# --- Create Mother edu [mother_edu] --- #
df = create_mother_edu(df, country, year, recode)

# Update weight
df = divide_weight_million(df, country, year, recode)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["mother_edu"] = np.where((df[var_mother_edu].isnull()) | (df[var_mother_edu].isin(mother_edu_ece_values)), "Mother Edu: None/ECE",
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["wmweight"] = df[var_weight] / 1000000


In [47]:
# --- ANC 4+ visits [anc_4_visits] --- #
df = create_anc_4_visits(df, country, year)

# --- ANC components [anc_3_components] --- #
df = create_anc_3_components(df, country, year)

# --- Institutional [inst_delivery] --- #
df = create_inst_delivery(df, country, year)

# --- Caesarean Delivery [caesarean_del] --- #
df = create_caesarean_del(df, country, year)

# Post-natal Health Check (mother) [pnc_mother]
df = create_pnc_mother(df, country, year)

# --- Low birthweight [low_bw] --- #
df = create_low_bw(df, country, year)

# --- Early Initiation BF [early_bf] --- #
df = create_early_bf(df, country, year)

# --- Iron Supplementation [iron_supp] --- #
df = create_iron_supp(df, country, year)

agg_value_prop_dict is: 
 {'Larger than average': 0.006701631701631702, 'Smaller than average': 0.42297979797979796, 'Average': 0.06353021978021978, 'Very large': 0.0, 'Very small': 0.8026315789473685, "Don't know": 0.0, 'nan': nan}


  agg_value_prop_dict[agg_value] = numerator / denominator


### Export working variables

In [48]:
export_analyzed_data(df, country, year, recode)

### --- 2010 ---

In [49]:
# Set parameters
country = 'KHM'
recode = 'women'

# -------------------------------------------------------------------
year = '2010'
# -------------------------------------------------------------------

In [50]:
# Read file
df = read_csv_file(country, year, recode)

# Create HHID to facilitate merge of HH and HL data
generate_HHID(df, country, year, recode)

# Add Total, Year
add_total_year(df, year)

# Run quality assurance
run_quality_assurance(df)

# Merge in HH and HL data
df = merge_hh_hl_data(df, country, year)

# Subset women file
df = subset_women_file(df, country, year)

The file -- wm_2010.csv -- has the following shape: Rows: 18754; Columns: 25
HHID is NOT unique
Drop columns if all values are NaN...
Updated -- Rows: 18754; Columns: 28
Checking if any rows are duplicates...
The are no duplicate rows
The number of mothers with a birth in the past two years is: 3215


#### Create Indicators

In [51]:
# --- Create Mother edu [mother_edu] --- #
df = create_mother_edu(df, country, year, recode)

# Update weight
df = divide_weight_million(df, country, year, recode)

In [52]:
# --- ANC 4+ visits [anc_4_visits] --- #
df = create_anc_4_visits(df, country, year)

# --- ANC components [anc_3_components] --- #
df = create_anc_3_components(df, country, year)

# --- Institutional [inst_delivery] --- #
df = create_inst_delivery(df, country, year)

# --- Caesarean Delivery [caesarean_del] --- #
df = create_caesarean_del(df, country, year)

# Post-natal Health Check (mother) [pnc_mother]
df = create_pnc_mother(df, country, year)

# --- Low birthweight [low_bw] --- #
df = create_low_bw(df, country, year)

# --- Early Initiation BF [early_bf] --- #
df = create_early_bf(df, country, year)

# --- Iron Supplementation [iron_supp] --- #
df = create_iron_supp(df, country, year)

agg_value_prop_dict is: 
 {'Very small': 0.8579545454545454, 'Smaller than average': 0.3225388601036269, 'Larger than average': 0.008637709772951628, 'Average': 0.08280685920577617, 'Very large': 0.0016891891891891893, 'DK': 0.25, 'nan': nan}


  agg_value_prop_dict[agg_value] = numerator / denominator


### Export working variables

In [53]:
export_analyzed_data(df, country, year, recode)

### --- 2005 ---

In [54]:
# Set parameters
country = 'KHM'
recode = 'women'

# -------------------------------------------------------------------
year = '2005'
# -------------------------------------------------------------------

In [55]:
# Read file
df = read_csv_file(country, year, recode)

# Create HHID to facilitate merge of HH and HL data
generate_HHID(df, country, year, recode)

# Add Total, Year
add_total_year(df, year)

# Run quality assurance
run_quality_assurance(df)

# Merge in HH and HL data
df = merge_hh_hl_data(df, country, year)

# Subset women file
df = subset_women_file(df, country, year)

The file -- wm_2005.csv -- has the following shape: Rows: 16823; Columns: 25
HHID is NOT unique
Drop columns if all values are NaN...
Updated -- Rows: 16823; Columns: 28
Checking if any rows are duplicates...
The are no duplicate rows
The number of mothers with a birth in the past two years is: 3268


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['two_years'] = np.where(df[var_doi] - df[var_dob] <= 23, 100, 0)


#### Create Indicators

In [56]:
# --- Create Mother edu [mother_edu] --- #
df = create_mother_edu(df, country, year, recode)

# Update weight
df = divide_weight_million(df, country, year, recode)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["mother_edu"] = np.where((df[var_mother_edu].isnull()) | (df[var_mother_edu].isin(mother_edu_ece_values)), "Mother Edu: None/ECE",
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["wmweight"] = df[var_weight] / 1000000


In [57]:
# --- ANC 4+ visits [anc_4_visits] --- #
df = create_anc_4_visits(df, country, year)

# --- ANC components [anc_3_components] --- #
df = create_anc_3_components(df, country, year)

# --- Institutional [inst_delivery] --- #
df = create_inst_delivery(df, country, year)

# --- Caesarean Delivery [caesarean_del] --- #
df = create_caesarean_del(df, country, year)

# Post-natal Health Check (mother) [pnc_mother]
df = create_pnc_mother(df, country, year)

# --- Low birthweight [low_bw] --- #
df = create_low_bw(df, country, year)

# --- Early Initiation BF [early_bf] --- #
df = create_early_bf(df, country, year)

# --- Iron Supplementation [iron_supp] --- #
df = create_iron_supp(df, country, year)

agg_value_prop_dict is: 
 {'Average': 0.10193726937269372, 'Larger than average': 0.011180679785330949, 'Very small': 0.8095238095238095, 'Smaller than average': 0.4425, 'Very large': 0.0, "Don't know": nan}


  agg_value_prop_dict[agg_value] = numerator / denominator


### Export working variables

In [58]:
export_analyzed_data(df, country, year, recode)

### --- 2000 ---

In [59]:
# Set parameters
country = 'KHM'
recode = 'women'

# -------------------------------------------------------------------
year = '2000'
# -------------------------------------------------------------------

In [60]:
# Read file
df = read_csv_file(country, year, recode)

# Create HHID to facilitate merge of HH and HL data
generate_HHID(df, country, year, recode)

# Add Total, Year
add_total_year(df, year)

# Run quality assurance
run_quality_assurance(df)

# Merge in HH and HL data
df = merge_hh_hl_data(df, country, year)

# Subset women file
df = subset_women_file(df, country, year)

The file -- wm_2000.csv -- has the following shape: Rows: 15351; Columns: 19
HHID is NOT unique
Drop columns if all values are NaN...
Updated -- Rows: 15351; Columns: 23
Checking if any rows are duplicates...
The are no duplicate rows
The number of mothers with a birth in the past two years is: 3210


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['two_years'] = np.where(df[var_doi] - df[var_dob] <= 23, 100, 0)


#### Create Indicators

In [61]:
# --- Create Mother edu [mother_edu] --- #
df = create_mother_edu(df, country, year, recode)

# Update weight
df = divide_weight_million(df, country, year, recode)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["mother_edu"] = np.where((df[var_mother_edu].isnull()) | (df[var_mother_edu].isin(mother_edu_ece_values)), "Mother Edu: None/ECE",
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["wmweight"] = df[var_weight] / 1000000


In [62]:
# --- ANC 4+ visits [anc_4_visits] --- #
df = create_anc_4_visits(df, country, year)

# --- ANC components [anc_3_components] --- #
df = create_anc_3_components(df, country, year)

# --- Institutional [inst_delivery] --- #
df = create_inst_delivery(df, country, year)

# --- Caesarean Delivery [caesarean_del] --- #
df = create_caesarean_del(df, country, year)

# # Post-natal Health Check (mother) [pnc_mother]
# df = create_pnc_mother(df, country, year)

# --- Low birthweight [low_bw] --- #
df = create_low_bw(df, country, year)

# --- Early Initiation BF [early_bf] --- #
df = create_early_bf(df, country, year)

# --- Iron Supplementation [iron_supp] --- #
df = create_iron_supp(df, country, year)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[var_anc_4] = df[var_anc_4].astype(str)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[var_anc_4] = pd.to_numeric(df[var_anc_4], errors="coerce")
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["anc_4_visits"] = np.where((df[var_anc_4] >= 4) & (df[var_anc_4] < 99), 100, 0)
A value is trying 

agg_value_prop_dict is: 
 {'Average': 0.05829015544041451, 'Very large': 0.0, 'Larger than average': 0.0012953367875647669, 'Smaller than average': 0.47619047619047616, 'DK': nan, 'Very small': 0.8, 'nan': nan}


  agg_value_prop_dict[agg_value] = numerator / denominator
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['low_bw'] = [agg_value_prop_dict[x] * 100 for x in df[var_birth_size]]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["early_bf"] = np.where((df[var_time_cat].isin(time_cat_values)), 100, 0)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-cop

### Export working variables

In [63]:
export_analyzed_data(df, country, year, recode)

In [None]:
var = 'low_bw'
ind_vars = ['residence']
wt = 'wmweight'

output_mean_table(df, var, ind_vars, wt)