# Alive & Thrive
## Cambodia Data: Women Files - Tabulation
## Prepared by Aaron Wise; aaron@a3di.dev
### Version: 12 November 2022

In [8]:
from pathlib import Path

import numpy as np
import pandas as pd

pd.set_option("display.max_rows", 1500)
pd.set_option("display.max_columns", None)

import statsmodels.api as sm

In [9]:
from tabulate_vars import (
    read_csv_file,
    concatenate_dfs,
    save_combined,
    create_bivariate_var_dep,
    extract_regression_params
)

from aw_analytics import mean_wt, output_mean_table

### Create and save combined women's file

In [10]:
# Set parameters
country = 'KHM'
recode = 'women'

# -------------------------------------------------------------------
year_list = ['2000', '2005', '2010', '2014'] 
# -------------------------------------------------------------------


# Read in files, store dfs in list
list_of_dfs = [read_csv_file(country, recode, year, file_type='working') for year in year_list]

# Combine the dfs
df = concatenate_dfs(list_of_dfs)

# Save and export
save_combined(df, country, recode)

The file -- KHM_women_2000_working.csv -- has the following shape: Rows: 3210; Columns: 20
The file -- KHM_women_2005_working.csv -- has the following shape: Rows: 3268; Columns: 27
The file -- KHM_women_2010_working.csv -- has the following shape: Rows: 3215; Columns: 27
The file -- KHM_women_2014_working.csv -- has the following shape: Rows: 2899; Columns: 24


In [11]:
df.groupby('Year').region.value_counts(dropna=False).sort_index().to_csv('region_by_year.csv')

### Run Descriptive stats

In [12]:
## TOTAL DATASET

# Set parameters
vars = ['anc_4_visits', 'anc_3_components', 'inst_delivery', 'caesarean_del', 'pnc_mother', 'low_bw', 'early_bf', 'iron_supp'] 

ind_vars = ['Total', 'residence', 'region', 'mother_edu', 'wealth_q', 'elderly_hoh', 'sex_hoh']

wt = 'wmweight'

# Run for loop

for year in year_list:

    df = read_csv_file(country, recode, year, file_type='working')

    out_fn = country + "_" + recode + "_" + year + ".xlsx"
    path = Path.cwd() / 'output' / 'frequencies' / recode / out_fn
    
    # Instantiate object
    xlwriter = pd.ExcelWriter(path)
    
    for var in vars:

        try:
            output = output_mean_table(df.dropna(subset=[var]), var, ind_vars, wt)
            sheet_name = str(var) + '_weighted'
            output.to_excel(xlwriter, sheet_name=sheet_name)
        
        except:
            print(f"Warning: Unable to create sheet for var {var}")

    xlwriter.close()

The file -- KHM_women_2000_working.csv -- has the following shape: Rows: 3210; Columns: 20
The file -- KHM_women_2005_working.csv -- has the following shape: Rows: 3268; Columns: 27
The file -- KHM_women_2010_working.csv -- has the following shape: Rows: 3215; Columns: 27
The file -- KHM_women_2014_working.csv -- has the following shape: Rows: 2899; Columns: 24


### Run Bivariate stats

In [13]:
# Read in combined file
df = read_csv_file(country, recode, file_type='combined')

# Create updated bivariate variables (mother_edu_biv, eth_hoh_biv)
df = create_bivariate_var_dep(df, country)

The file -- KHM_women_combined.csv -- has the following shape: Rows: 12592; Columns: 24


In [14]:
# Set parameters
recode = 'women'
var_dep_list = ['anc_4_visits', 'anc_3_components', 'inst_delivery', 'caesarean_del', 'pnc_mother', 'low_bw', 'early_bf', 'iron_supp'] 

ind_var_list = ['residence', 'region', 'mother_edu_biv', 'wealth_q', 'elderly_hoh', 'sex_hoh']

# Run bivariate (extract WLS regression params)
output = {var_dep: pd.DataFrame({ind_var: extract_regression_params(df, var_dep, ind_var, recode) for ind_var in ind_var_list}) for var_dep in var_dep_list}

var_dep and ind_var are: 
 ['anc_4_visits'], ['residence']
var_dep and ind_var are: 
 ['anc_4_visits'], ['region']
var_dep and ind_var are: 
 ['anc_4_visits'], ['mother_edu_biv']
var_dep and ind_var are: 
 ['anc_4_visits'], ['wealth_q']
var_dep and ind_var are: 
 ['anc_4_visits'], ['elderly_hoh']
var_dep and ind_var are: 
 ['anc_4_visits'], ['sex_hoh']
var_dep and ind_var are: 
 ['anc_3_components'], ['residence']
var_dep and ind_var are: 
 ['anc_3_components'], ['region']
var_dep and ind_var are: 
 ['anc_3_components'], ['mother_edu_biv']
var_dep and ind_var are: 
 ['anc_3_components'], ['wealth_q']
var_dep and ind_var are: 
 ['anc_3_components'], ['elderly_hoh']
var_dep and ind_var are: 
 ['anc_3_components'], ['sex_hoh']
var_dep and ind_var are: 
 ['inst_delivery'], ['residence']
var_dep and ind_var are: 
 ['inst_delivery'], ['region']
var_dep and ind_var are: 
 ['inst_delivery'], ['mother_edu_biv']
var_dep and ind_var are: 
 ['inst_delivery'], ['wealth_q']
var_dep and ind_var are: 

In [15]:
# Generate csv of outputs
for var in var_dep_list:
    fn = var + '.csv'
    output[var].transpose().to_csv(f"./output/bivariate/{recode}/{fn}")

#### Get bivariate differences

In [16]:
df = read_csv_file(country, recode, file_type='combined')

df = create_bivariate_var_dep(df, country)

var_dep_list = ['anc_4_visits', 'anc_3_components', 'inst_delivery', 'caesarean_del', 'pnc_mother', 'low_bw', 'early_bf', 'iron_supp'] 

ind_var_list = ['residence', 'region', 'mother_edu_biv', 'wealth_q', 'elderly_hoh', 'sex_hoh']


for var in var_dep_list:

    out_fn = var + "_bivariate_diff" + ".xlsx"
    path = Path.cwd() / 'output' / 'bivariate' / recode / out_fn

    # Instantiate object
    xlwriter = pd.ExcelWriter(path)

    for ind_var in ind_var_list:

        output = df.groupby(['Year', ind_var]).apply(mean_wt, var, wt=wt).unstack().transpose()
        
        sheet_name = str(ind_var) + '_weighted'
        output.to_excel(xlwriter, sheet_name=sheet_name)

    xlwriter.close()


The file -- KHM_women_combined.csv -- has the following shape: Rows: 12592; Columns: 24
