### Cambodia Anthro analysis

In [1]:
import numpy as np
import pandas as pd

pd.set_option("display.max_rows", 1500)
pd.set_option("display.max_columns", None)

from std_utils import (
    read_spss_file,
    generate_HHID,
    add_total_year,
    run_quality_assurance,
    merge_hh_hl_data,
    export_analyzed_data
)

from children_analysis import (
    create_sex_ch,
    create_stunting_ch,
    create_wasting_ch,
    create_overweight_ch
)
from women_analysis import create_mother_edu, divide_weight_million


from aw_analytics import output_mean_table

In [17]:
# Set parameters
country = 'KHM'
recode = 'anthro'

recode1 = 'measurements'

# -------------------------------------------------------------------
year = '2000'
# -------------------------------------------------------------------

In [18]:
# Read file
df = read_spss_file(country, year, recode)

if year in ['2005', '2000']:
    measurements = read_spss_file(country, year, recode1)
    df = pd.merge(df, measurements, left_on=['HHID', 'HVIDX'], right_on=['HWHHID', 'HWLINE'], how='left')

# Create HHID to facilitate merge of HH and HL data
generate_HHID(df, country, year, recode)

# Add Total, Year
add_total_year(df, year)

# Run quality assurance
run_quality_assurance(df)

# Merge in HH and HL data
df = merge_hh_hl_data(df, country, year)

The file -- anthro_2000.sav -- has the following shape: Rows: 66285; Columns: 327
The file -- measurements_2000.sav -- has the following shape: Rows: 4031; Columns: 7
HHID is NOT unique
Drop columns if all values are NaN...
Updated -- Rows: 66285; Columns: 322
Checking if any rows are duplicates...
The are no duplicate rows


In [19]:
# --- Child Sex [sex_ch] --- #
df = create_sex_ch(df, country, year, recode='anthro')

# --- Mother Education [mother_edu] --- #
df = create_mother_edu(df, country, year, recode = 'anthro')

# Update weight
df = divide_weight_million(df, country, year, recode='anthro')

In [20]:
# --- Child Stunting (< -2SD) [stunting_ch] --- #
df = create_stunting_ch(df, country, year)

# --- Child Wasting (< -2SD) [wasting_ch] --- #
df = create_wasting_ch(df, country, year)

# --- Child Overweight (> 2SD) [overweight_ch] --- #
df = create_overweight_ch(df, country, year)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[var_stunting_z] = pd.to_numeric(df[var_stunting_z].astype(str), errors='coerce')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['stunting_ch'] = np.where(df[var_stunting_z]/100 < -2, 100, 0)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['stunting_ch'] = np.where((df[var_stunting_z].isnull

In [21]:
export_analyzed_data(df, country, year, recode)

In [None]:
var = 'stunting_ch'
ind_vars = ['sex_ch']
weight = ['chweight']

output_mean_table(df, var, ind_vars, weight)