## Additional Analysis of NHIES Data
### Maize, millet, sorghum and wheat data by region
### 07 October 2020

## HH File data

In [1]:
import os

import numpy as np
import pandas as pd
pd.set_option("display.max_rows", 220)
pd.set_option("display.max_columns", None)

import matplotlib.pyplot as plt

import seaborn as sns
sns.set_style("whitegrid")

from PIL import Image

In [2]:
df = pd.read_csv('./data/crop.csv', parse_dates=['start_date'])

In [3]:
# Remove data before April 2015
ts = pd.to_datetime('04/01/2015')
df = df[df['start_date'] >= ts]

In [4]:
# Set index as date
df.set_index('start_date', inplace=True)

In [5]:
# Create variable for Grains (q02_60_02) from 'own production' and 'using_cash'
df['Grains_own_production'] = np.where(df['q02_60_02'].isin(['own_production_garden__livestock']), 1, 0)
df['Grains_cash'] = np.where(df['q02_60_02'].isin(['bought_using_cash']), 1, 0)

In [6]:
# Resample multiple values -- using agg
means = df.groupby('region').resample('1M').agg({'Grains_own_production': 'mean', 'Grains_cash': 'mean'})
means_tot = df.groupby('Total').resample('1M').agg({'Grains_own_production': 'mean', 'Grains_cash': 'mean'})

# Multiple by 100
means = means * 100
means_tot = means_tot * 100

### Charting data

In [7]:
# Create function to create HH charts
def generate_HH_othcereal_cons_chart(df, col_name, idx_name):
    # Font parameters
    tw = {'fontname': 'Tw Cen MT'}
    cl = {'fontname': 'Calibri Light'}

    # Generate plot
    ax = plt.gca()
    ax = df[col_name][idx_name].plot(ax=ax)
    # plt.title(f'{idx_name}: HHs consuming "other cereals" from Own Production (%)', loc='left', fontsize=12, **tw)
    plt.xlabel("", fontsize=8, **cl)
    plt.ylabel("Percent (%)", fontsize=8, **cl)
    plt.xticks(fontsize=8), plt.yticks(fontsize=9)
    plt.legend(fontsize=7, loc='upper left', frameon=False)
    ax.set_ylim([0,60])
    fig = ax.get_figure()

    # Create filename
    location = 'output\\02_region\\charts\\HH\\'
    f_ext = '.svg'
    chart_fn = idx_name + '_' + 'othercereal_own_production' + f_ext
    chart_path = os.path.join(os.getcwd(), location, chart_fn)
    # Save file
    fig.savefig(chart_path)
    plt.cla()

In [None]:
# Run for total
generate_HH_othcereal_cons_chart(means_tot, 'Grains_own_production', 'Total')

In [None]:
# Loop through all regions, generate chart, and save
region_list = means.index.get_level_values(0).unique().tolist()

[generate_HH_othcereal_cons_chart(means, 'Grains_own_production', region) for region in region_list]

## DRB file

In [8]:
drb = pd.read_stata('./data/Diary_coicopV12.dta')

In [9]:
print(drb.shape)
# Drop purchases
# drb = drb[drb['purch_cons'].isin(['consumption'])]
print(drb.shape)

# Keep grain items  per Karen list
wheat_list = ['Bread (white, brown, whole wheat, rye, maize, etc)', 'Macaroni, spaghetti, noodles',
'Traditional bread, ash bread, oshikwiila, oshima, omungome', 'Vetkoek', 'Bread/ cake flour (all types)', 'Cakes (all types)',
'Brotchen', 'Pies & pizzas']

mahangu_list = ['Mageu/Oshikundu', 'Mahangu meal/ grain/ samp (pearl millet, raw)']

oil_list = ['Cooking oil, sunflower', 'Olive oil']

maize_list = ['Maize meal/ grain/ samp']

sorghum_list = ['Sorghum meal/ grain (whole grain, raw)']

vehicles_list = wheat_list + maize_list + mahangu_list + sorghum_list + oil_list

drb = drb[drb['label'].isin(vehicles_list)]
print(drb.shape)

drb.sort_values('hhid', inplace=True)


(538493, 24)
(538493, 24)
(154560, 24)


In [10]:
# Create date dataframe for merging into DRB
date_df = df.reset_index().sort_values('hhid').iloc[:,0:2]
date_df['hhid'] = date_df.hhid.str.strip()

In [11]:
# Bring in start_date from HH file
drb = drb.merge(date_df, on='hhid', how='left', suffixes=("", "_right"))

# Drop if start_date missing
drb = drb.dropna(subset=['start_date'])

In [12]:
drb.set_index('start_date', inplace=True)

In [13]:
# Export food items by source frequencies
drb.groupby('label').source.value_counts(normalize=True).to_csv('./output/02_region/vehicle_sources_freq.csv')
drb.groupby('label').source.value_counts().to_csv('./output/02_region/vehicle_sources.csv')

In [None]:
# Food items with high % (>45%) consumed from own production
mageu = ['Mageu/Oshikundu']
mahangu = ['Mahangu meal/ grain/ samp (pearl millet, raw)']
maize = ['Maize meal/ grain/ samp']  # Look at purchases
sorghum = ['Sorghum meal/ grain (whole grain, raw)']
bread = ['Bread (white, brown, whole wheat, rye, maize, etc)']
bread_trad = ['Traditional bread, ash bread, oshikwiila, oshima, omungome']
oil = ['Cooking oil, sunflower']

In [None]:
# Create values if food item consumed from own production
drb['Mageu_own_production'] = np.where(drb['label'].isin(mageu) & drb['source'].isin(['Own produce']), 1, 0)
drb['Mahangu_own_production'] = np.where(drb['label'].isin(mahangu) & drb['source'].isin(['Own produce']), 1, 0)
drb['Sorghum_own_production'] = np.where(drb['label'].isin(sorghum) & drb['source'].isin(['Own produce']), 1, 0)
drb['Bread_trad_own_production'] = np.where(drb['label'].isin(bread_trad) & drb['source'].isin(['Own produce']), 1, 0)
# Purchase
purch_list = ['Supermarket', 'Shebeen', 'Open market/ Vendor']
drb['Maize_purchase'] = np.where(drb['label'].isin(maize) & drb['source'].isin(purch_list), 1, 0)
drb['Bread_purchase'] = np.where(drb['label'].isin(bread) & drb['source'].isin(purch_list), 1, 0)
drb['Oil_purchase'] = np.where(drb['label'].isin(oil) & drb['source'].isin(purch_list), 1, 0)

In [None]:
drb.insert(2, 'total', 'total')
drb.head()

In [None]:
# Create separate mean dataframes
mageu_mean = drb.groupby('region').resample('1M').agg({'Mageu_own_production': 'mean'})
mahangu_mean = drb.groupby('region').resample('1M').agg({'Mahangu_own_production': 'mean'})
sorghum_mean = drb.groupby('region').resample('1M').agg({'Sorghum_own_production': 'mean'})
bread_trad_mean = drb.groupby('region').resample('1M').agg({'Bread_trad_own_production': 'mean'})
maize_mean = drb.groupby('region').resample('1M').agg({'Maize_purchase': 'mean'})
bread_mean = drb.groupby('region').resample('1M').agg({'Bread_purchase': 'mean'})
oil_mean = drb.groupby('region').resample('1M').agg({'Oil_purchase': 'mean'})

mahangu_mean_tot = drb.groupby('total').resample('1M').agg({'Mahangu_own_production': 'mean'})
maize_mean_tot = drb.groupby('total').resample('1M').agg({'Maize_purchase': 'mean'})
bread_mean_tot = drb.groupby('total').resample('1M').agg({'Bread_purchase': 'mean'})


mageu_mean = mageu_mean * 100
mahangu_mean = mahangu_mean * 100
sorghum_mean = sorghum_mean * 100
bread_trad_mean = bread_trad_mean * 100
maize_mean = maize_mean * 100
bread_mean = bread_mean * 100
oil_mean = oil_mean * 100

mahangu_mean_tot = mahangu_mean_tot * 100
maize_mean_tot = maize_mean_tot * 100
bread_mean_tot = bread_mean_tot * 100

## Charting Data

In [None]:
df_col_dict = {'Mahangu_own_production': mahangu_mean, 'Maize_purchase': maize_mean, 'Bread_purchase': bread_mean}
df_tot_dict = {'Mahangu_own_production': mahangu_mean_tot, 'Maize_purchase': maize_mean_tot, 'Bread_purchase': bread_mean_tot}

In [None]:
df_tot_dict['Maize_purchase']

In [None]:
# Create function to create DRB charts
def generate_DRB_cons_chart(df_col_dict, idx_name):
    # Font parameters
    tw = {'fontname': 'Tw Cen MT'}
    cl = {'fontname': 'Calibri Light'}

    # Generate plot
    ax = plt.gca()
    list_of_cols = list(df_col_dict.keys())
    list_of_dfs = list(df_col_dict.values())

    pd.DataFrame(list_of_dfs[0][list_of_cols[0]][idx_name]).plot(ax=ax)
    pd.DataFrame(list_of_dfs[1][list_of_cols[1]][idx_name]).plot(ax=ax)
    pd.DataFrame(list_of_dfs[2][list_of_cols[2]][idx_name]).plot(ax=ax)

    # plt.title(f'{idx_name}: Source of Mahangu, Maize, Bread consumed daily', loc='left', fontsize=12, **tw)
    plt.xlabel("", fontsize=8, **cl)
    plt.ylabel("Percent (%)", fontsize=8, **cl)
    plt.xticks(fontsize=8), plt.yticks(fontsize=9)
    plt.legend(fontsize=7, loc='upper left', frameon=False)
    ax.set_ylim([0,60])
    fig = ax.get_figure()

    # Create filename
    location = 'output\\02_region\\charts\\DRB\\'
    f_ext = '.svg'
    chart_fn = idx_name + '_' + 'DRB' + f_ext
    chart_path = os.path.join(os.getcwd(), location, chart_fn)
    # Save file
    fig.savefig(chart_path)
    plt.cla()

In [None]:
[generate_DRB_cons_chart(df_col_dict, region) for region in region_list]

In [None]:
generate_DRB_cons_chart(df_tot_dict, 'total')