# Alive & Thrive
## Laos Data: Women Files - Tabulation
## Prepared by Aaron Wise; aaron@a3di.dev
### Version: 30 September 2022

In [1]:
from pathlib import Path

import numpy as np
import pandas as pd

pd.set_option("display.max_rows", 1500)
pd.set_option("display.max_columns", None)

import statsmodels.api as sm

In [2]:
# %load tabulate_imports.py
from tabulate_vars import (
    read_csv_file,
    concatenate_dfs,
    save_combined,
    create_bivariate_var_dep,
    extract_regression_params
)

from aw_analytics import mean_wt, output_mean_table

### Create and save combined women's file

In [3]:
# Set parameters
country = 'LAO'
recode = 'women'

# -------------------------------------------------------------------
year_list = ['2000', '2006', '2012', '2017'] 
# -------------------------------------------------------------------


# Read in files, store dfs in list
list_of_dfs = [read_csv_file(country, recode, year, file_type='working') for year in year_list]

# Combine the dfs
df = concatenate_dfs(list_of_dfs)

# Save and export
save_combined(df, country, recode)

The file -- LAO_women_2000_working.csv -- has the following shape: Rows: 1190; Columns: 16
The file -- LAO_women_2006_working.csv -- has the following shape: Rows: 1622; Columns: 17
The file -- LAO_women_2012_working.csv -- has the following shape: Rows: 4444; Columns: 24
The file -- LAO_women_2017_working.csv -- has the following shape: Rows: 4460; Columns: 24


### Run Descriptive stats

In [4]:
## TOTAL DATASET

# Set parameters
vars = ['anc_4_visits', 'anc_3_components', 'inst_delivery', 'caesarean_del', 'pnc_mother', 'low_bw', 'early_bf', 'iron_supp'] 

ind_vars = ['Total', 'residence', 'region', 'mother_edu', 'wealth_q', 'eth_hoh', 'elderly_hoh', 'sex_hoh']

wt = 'wmweight'

# Run for loop

for year in year_list:

    df = read_csv_file(country, recode, year, file_type='working')

    out_fn = country + "_" + recode + "_" + year + ".xlsx"
    path = Path.cwd() / 'output' / 'frequencies' / recode / out_fn
    
    # Instantiate object
    xlwriter = pd.ExcelWriter(path)
    
    for var in vars:

        try:
            output = output_mean_table(df.dropna(subset=[var]), var, ind_vars, wt)
            sheet_name = str(var) + '_weighted'
            output.to_excel(xlwriter, sheet_name=sheet_name)
        
        except:
            print(f"Warning: Unable to create sheet for var {var}")

    xlwriter.close()

The file -- LAO_women_2000_working.csv -- has the following shape: Rows: 1190; Columns: 16
The file -- LAO_women_2006_working.csv -- has the following shape: Rows: 1622; Columns: 17
The file -- LAO_women_2012_working.csv -- has the following shape: Rows: 4444; Columns: 24
The file -- LAO_women_2017_working.csv -- has the following shape: Rows: 4460; Columns: 24


### Run Bivariate stats

In [5]:
# Read in combined file
df = read_csv_file(country, recode, file_type='combined')

# Create updated bivariate variables (mother_edu_biv, eth_hoh_biv)
df = create_bivariate_var_dep(df, country)

The file -- LAO_women_combined.csv -- has the following shape: Rows: 11716; Columns: 24


In [6]:
# Set parameters
recode = 'women'
var_dep_list = ['anc_4_visits', 'anc_3_components', 'inst_delivery', 'caesarean_del', 'pnc_mother', 'low_bw', 'early_bf', 'iron_supp'] 

ind_var_list = ['residence', 'region', 'mother_edu_biv', 'wealth_q', 'eth_hoh_biv', 'elderly_hoh', 'sex_hoh']

# Run bivariate (extract WLS regression params)
output = {var_dep: pd.DataFrame({ind_var: extract_regression_params(df, var_dep, ind_var, recode) for ind_var in ind_var_list}) for var_dep in var_dep_list}

                            WLS Regression Results                            
Dep. Variable:           anc_4_visits   R-squared:                       0.157
Model:                            WLS   Adj. R-squared:                  0.157
Method:                 Least Squares   F-statistic:                     552.4
Date:                Wed, 05 Oct 2022   Prob (F-statistic):               0.00
Time:                        15:58:28   Log-Likelihood:                -47359.
No. Observations:                8904   AIC:                         9.473e+04
Df Residuals:                    8900   BIC:                         9.475e+04
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                      coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------
intercept          27.2213      0.777     

In [7]:
# Generate csv of outputs
for var in var_dep_list:
    fn = var + '.csv'
    output[var].transpose().to_csv(f"./output/bivariate/{recode}/{fn}")

#### Get bivariate differences

In [8]:
df = read_csv_file(country, recode, file_type='combined')

df = create_bivariate_var_dep(df, country)

var_dep_list = ['anc_4_visits', 'anc_3_components', 'inst_delivery', 'caesarean_del', 'pnc_mother', 'low_bw', 'early_bf'] 

ind_var_list = ['residence', 'region', 'mother_edu_biv', 'wealth_q', 'eth_hoh_biv', 'elderly_hoh', 'sex_hoh']


for var in var_dep_list:

    out_fn = var + "_bivariate_diff" + ".xlsx"
    path = Path.cwd() / 'output' / 'bivariate' / recode / out_fn

    # Instantiate object
    xlwriter = pd.ExcelWriter(path)

    for ind_var in ind_var_list:

        output = df.groupby(['Year', ind_var]).apply(mean_wt, var, wt=wt)
        
        sheet_name = str(ind_var) + '_weighted'
        output.to_excel(xlwriter, sheet_name=sheet_name)

    xlwriter.close()


The file -- LAO_women_combined.csv -- has the following shape: Rows: 11716; Columns: 24
