# Alive & Thrive
## Viet Nam Data: Women Files - Tabulation
## Prepared by Aaron Wise; aaron@a3di.dev
### Version: 11 July 2022

In [None]:
from pathlib import Path

import numpy as np
import pandas as pd

import statsmodels.api as sm

In [None]:
# %load tabulate_imports.py
from tabulate import (
    read_csv_file,
    concatenate_dfs,
    save_combined,
    create_bivariate_var_dep,
    extract_regression_params
)

from aw_analytics import mean_wt, output_mean_table

### Create and save combined women's file

In [None]:
# Set parameters
country = 'VNM'
recode = 'women'

# -------------------------------------------------------------------
year_list = ['2000', '2006', '2011', '2014', '2021'] 
# -------------------------------------------------------------------


# Read in files, store dfs in list
list_of_dfs = [read_csv_file(country, recode, year, file_type='working') for year in year_list]

# Combine the dfs
df = concatenate_dfs(list_of_dfs)

# Save and export
save_combined(df, country, recode)

### Run Descriptive stats

In [None]:
## TOTAL DATASET

# Set parameters
vars = ['anc_4_visits', 'anc_3_components', 'inst_delivery', 'caesarean_del', 'pnc_mother', 'low_bw', 'early_bf'] 

ind_vars = ['Total', 'residence', 'region', 'mother_edu', 'wealth_q', 'eth_hoh', 'elderly_hoh', 'sex_hoh']

wt = 'wmweight'

# Run for loop

for year in year_list:

    df = read_csv_file(country, recode, year, file_type='working')

    out_fn = country + "_" + recode + "_" + year + ".xlsx"
    path = Path.cwd() / 'output' / 'frequencies' / out_fn
    
    # Instantiate object
    xlwriter = pd.ExcelWriter(path)
    
    for var in vars:

        try:
            output = output_mean_table(df, var, ind_vars, wt)
            sheet_name = str(var) + '_weighted'
            output.to_excel(xlwriter, sheet_name=sheet_name)
        
        except:
            print(f"Warning: Unable to create sheet for var {var}")

    xlwriter.close()

### Run Bivariate stats

In [None]:
# Read in combined file
df = read_csv_file(country, recode, file_type='combined')

# Create updated bivariate variables (mother_edu_biv, eth_hoh_biv)
df = create_bivariate_var_dep(df)

In [None]:
# Set parameters
var_dep_list = ['anc_4_visits', 'anc_3_components', 'inst_delivery', 'caesarean_del', 'pnc_mother', 'low_bw', 'early_bf'] 

ind_var_list = ['residence', 'region', 'mother_edu_biv', 'wealth_q', 'eth_hoh_biv', 'elderly_hoh', 'sex_hoh']

# Run bivariate (extract WLS regression params)
output = {var_dep: pd.DataFrame({ind_var: extract_regression_params(df, var_dep, ind_var) for ind_var in ind_var_list}) for var_dep in var_dep_list}

In [None]:
# Generate csv of outputs

for var in var_dep_list:
    fn = var + '.csv'
    output[var].transpose().to_csv(f"./output/bivariate/{fn}")