In [424]:
import os
import sys

import pandas as pd
import numpy as np
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
from IPython.display import display


# options
pd.options.display.max_columns = None
pd.options.display.max_rows = 1000
sns.set(style='whitegrid')

In [425]:
df = pd.read_csv("NHIES_2015_16_household_level_labelled_TRUNC_2.csv")

Data Cleaning

In [426]:
# Clean/update labels
# q02_58: Consumed wheat flour / bread, other cereals, rice, oils/fats
df.loc[:,['q02_58_01']] = df.replace({'yes': 1, 'no': 0})
df.loc[:,['q02_58_02']] = df.replace({'yes': 1, 'no': 0})
df.loc[:,['q02_58_04']] = df.replace({'yes': 1, 'no': 0})
df.loc[:,['q02_58_13']] = df.replace({'yes': 1, 'no': 0})
df.loc[:,['q02_58_15']] = df.replace({'yes': 1, 'no': 0})

# Clean q07_02_1-5 - replace NaN with 0; checked with 1; unchecked with 0
cols = ['q07_02_1', 'q07_02_2', 'q07_02_3', 'q07_02_4', 'q07_02_5']
df.loc[:,cols] = df.replace({'checked': 1, 'unchecked': 0, np.nan: 0})

cols = ['q07_02_6']
df.loc[:, cols] = df.replace({'yes': 1, 0.0: 0, '0': 0})

# Clean q07_08_3 - checked with 1; unchecked with 0
df.loc[:,['q07_08_3']] = df.replace({'checked': 1, 'unchecked': 0})

In [427]:
# Binarize categorical variables

# Clean categorical variable values
df['q02_60_01'] = df['q02_60_01'].str.strip().str.lower().str.replace(',', '_').str.replace(' ', '_').str.replace('/', '_').str.replace('(', '').str.replace(')', '')
df['q02_60_02'] = df['q02_60_02'].str.strip().str.lower().str.replace(',', '_').str.replace(' ', '_').str.replace('/', '_').str.replace('(', '').str.replace(')', '')
df['q02_60_04'] = df['q02_60_04'].str.strip().str.lower().str.replace(',', '_').str.replace(' ', '_').str.replace('/', '_').str.replace('(', '').str.replace(')', '')
df['q02_60_13'] = df['q02_60_13'].str.strip().str.lower().str.replace(',', '_').str.replace(' ', '_').str.replace('/', '_').str.replace('(', '').str.replace(')', '')
df['q02_60_15'] = df['q02_60_15'].str.strip().str.lower().str.replace(',', '_').str.replace(' ', '_').str.replace('/', '_').str.replace('(', '').str.replace(')', '')


# q02_60_01 -> wheat flour
main_source_wheat_bin = pd.get_dummies(df['q02_60_01'], prefix='main_source_wheat')
df = df[:].join(main_source_wheat_bin)

# q02_60_02 -> other cereals
main_source_other_bin = pd.get_dummies(df['q02_60_02'], prefix='main_source_other')
df = df[:].join(main_source_other_bin)

# q02_60_04 -> rice
main_source_rice_bin = pd.get_dummies(df['q02_60_04'], prefix='main_source_rice')
df = df[:].join(main_source_rice_bin)

# q02_60_13 -> oil/fats
main_source_oil_bin = pd.get_dummies(df['q02_60_13'], prefix='main_source_oil')
df = df[:].join(main_source_oil_bin)

# q02_60_15 -> spices
main_source_spices_bin = pd.get_dummies(df['q02_60_15'], prefix='main_source_spices')
df = df[:].join(main_source_spices_bin)

# Replace NaN
# Wheat flour / bread
idx = df.columns.get_loc("main_source_wheat_asking_for_help_from_others")
cols = list(df.iloc[:,idx:(idx+11)])
df.loc[df['q02_58_01'] == 0, cols] = np.nan

# Other cereal
idx = df.columns.get_loc("main_source_other_asking_for_help_from_others")
cols = list(df.iloc[:,idx:(idx+11)])
df.loc[df['q02_58_02'] == 0, cols] = np.nan

# Rice
idx = df.columns.get_loc("main_source_rice_asking_for_help_from_others")
cols = list(df.iloc[:,idx:(idx+11)])
df.loc[df['q02_58_04'] == 0, cols] = np.nan

# Oils/fats
idx = df.columns.get_loc("main_source_oil_asking_for_help_from_others")
cols = list(df.iloc[:,idx:(idx+11)])
df.loc[df['q02_58_13'] == 0, cols] = np.nan

# Spices
idx = df.columns.get_loc("main_source_spices_asking_for_help_from_others")
cols = list(df.iloc[:,idx:(idx+11)])
df.loc[df['q02_58_15'] == 0, cols] = np.nan


In [428]:
# Create new variables

# Age of Household Head -> Categories
conditions = [
    (df['age_of_head'] < 18),
    (df['age_of_head'] >= 18) & (df['age_of_head'] < 60),
    (df['age_of_head'] >= 60)]
choices = ['0-18', '18-59', '60+']
df['age_of_head_cat'] = np.select(conditions, choices)

Analysis

In [446]:
# HH Consumed food group - Q02_58_01

cols = ['q02_58_01', 'q02_58_02', 'q02_58_04', 'q02_58_13', 'q02_58_15']

cons_yn_df = df[cols]

ind_var = ['urbrur', 'region', 'sex_of_head', 'age_of_head_cat', 'attain', 'apci_dec', 'main_language', 'wgt_hh']
ind_df = df[ind_var]

cons_yn_df = pd.concat([ind_df, cons_yn_df], axis=1)

# CREATE FUNCTION WHICH CALCULATES WEIGHTED AVERAGE WITH NaN DROPPED -----
# MUST DROP NaN FROM ANALYSIS BECAUSE, IF NOT, SUM OF WEIGHTS INCREASES DENOMINATOR

def group_wa10(series):
    dropped = series.dropna()
    try:
        return np.average(dropped, weights = cons_yn_df.loc[dropped.index, 'wgt_hh'])
    except ZeroDivisionError:
        return 0

# Create dictionary agg_func --> then put inside agg()

agg_func11 = {'q02_58_01': [group_wa10, 'count'],
            'q02_58_02': group_wa10,
            'q02_58_04': group_wa10,
            'q02_58_13': group_wa10,
            'q02_58_15': group_wa10}

obj1 = cons_yn_df.groupby('urbrur').agg(agg_func11)
obj2 = cons_yn_df.groupby('region').agg(agg_func11)
obj3 = cons_yn_df.groupby('sex_of_head').agg(agg_func11)
obj4 = cons_yn_df.groupby('age_of_head_cat').agg(agg_func11)
obj5 = cons_yn_df.groupby('attain').agg(agg_func11)
obj6 = cons_yn_df.groupby('apci_dec').agg(agg_func11)
obj7 = cons_yn_df.groupby('main_language').agg(agg_func11)

# Stack the DataFrames on top of each other
vertical_stack = pd.concat([obj1, obj2, obj3, obj4, obj5, obj6, obj7], axis=0)

# Write DataFrame to CSV
vertical_stack.reset_index().to_csv('q02_58_wt.csv', index=False)

In [435]:
cons_yn_df.agg(agg_func11)

q02_58_01    0.698104
q02_58_02    0.819617
q02_58_04    0.518302
q02_58_13    0.825199
q02_58_15    0.529356
dtype: float64

In [448]:
# Days Consumed food group - Q02_59_01

cols = ['q02_59_01', 'q02_59_02', 'q02_59_04', 'q02_59_13', 'q02_59_15']

cons_days_df = df[cols]

ind_var = ['urbrur', 'region', 'sex_of_head', 'age_of_head_cat', 'attain', 'apci_dec', 'main_language', 'wgt_hh']
ind_df = df[ind_var]

cons_days_df = pd.concat([ind_df, cons_days_df], axis=1)

# CREATE FUNCTION WHICH CALCULATES WEIGHTED AVERAGE WITH NaN DROPPED -----
# MUST DROP NaN FROM ANALYSIS BECAUSE, IF NOT, SUM OF WEIGHTS INCREASES DENOMINATOR

def group_wa11(series):
    dropped = series.dropna()
    try:
        return np.average(dropped, weights = cons_days_df.loc[dropped.index, 'wgt_hh'])
    except ZeroDivisionError:
        return 0

# Create dictionary agg_func --> then put inside agg()

agg_func12 = {'q02_59_01': [group_wa11, 'count'],
            'q02_59_02': [group_wa11, 'count'],
            'q02_59_04': [group_wa11, 'count'],
            'q02_59_13': [group_wa11, 'count'],
            'q02_59_15': [group_wa11, 'count']}

obj1 = cons_days_df.groupby('urbrur').agg(agg_func12)
obj2 = cons_days_df.groupby('region').agg(agg_func12)
obj3 = cons_days_df.groupby('sex_of_head').agg(agg_func12)
obj4 = cons_days_df.groupby('age_of_head_cat').agg(agg_func12)
obj5 = cons_days_df.groupby('attain').agg(agg_func12)
obj6 = cons_days_df.groupby('apci_dec').agg(agg_func12)
obj7 = cons_days_df.groupby('main_language').agg(agg_func12)

# Stack the DataFrames on top of each other
vertical_stack = pd.concat([obj1, obj2, obj3, obj4, obj5, obj6, obj7], axis=0)

# Write DataFrame to CSV
vertical_stack.reset_index().to_csv('q02_59_wt.csv', index=False)

In [437]:
### --------  Q02_60

# ------------  Create reduced dataframe -----------------

idx = df.columns.get_loc("main_source_wheat_asking_for_help_from_others")
cols = list(df.iloc[:,idx:(idx+54)])

staple_source_df = df[cols]

ind_var = ['urbrur', 'region', 'sex_of_head', 'age_of_head_cat', 'attain', 'apci_dec', 'main_language', 'wgt_hh']
ind_df = df[ind_var]

staple_source_df = pd.concat([ind_df, staple_source_df], axis=1)

# CREATE FUNCTION WHICH CALCULATES WEIGHTED AVERAGE WITH NaN DROPPED -----
# MUST DROP NaN FROM ANALYSIS BECAUSE, IF NOT, SUM OF WEIGHTS INCREASES DENOMINATOR

def group_wa(series):
    dropped = series.dropna()
    try:
        return np.average(dropped, weights=staple_source_df.loc[dropped.index, 'wgt_hh'])
    except ZeroDivisionError:
        return 0

# Create dictionary agg_func using dict comprehension --> then put inside agg()

agg_func1 = {cols[i]: group_wa for i in range(len(cols))}

obj1 = staple_source_df.groupby('urbrur').agg(agg_func1)
obj2 = staple_source_df.groupby('region').agg(agg_func1)
obj3 = staple_source_df.groupby('sex_of_head').agg(agg_func1)
obj4 = staple_source_df.groupby('age_of_head_cat').agg(agg_func1)
obj5 = staple_source_df.groupby('attain').agg(agg_func1)
obj6 = staple_source_df.groupby('apci_dec').agg(agg_func1)
obj7 = staple_source_df.groupby('main_language').agg(agg_func1)

# Stack the DataFrames on top of each other
vertical_stack = pd.concat([obj1, obj2, obj3, obj4, obj5, obj6, obj7], axis=0)

# Write DataFrame to CSV
vertical_stack.reset_index().to_csv('q02_60_wt.csv', index=False)

In [449]:
cols = ['q07_02_1', 'q07_02_2', 'q07_02_3', 'q07_02_4', 'q07_02_6']

grow_yn_df = df[cols]

ind_var = ['urbrur', 'region', 'sex_of_head', 'age_of_head_cat', 'attain', 'apci_dec', 'main_language', 'wgt_hh']
ind_df = df[ind_var]

grow_yn_df = pd.concat([ind_df, grow_yn_df], axis=1)

# CREATE FUNCTION WHICH CALCULATES WEIGHTED AVERAGE WITH NaN DROPPED -----
# MUST DROP NaN FROM ANALYSIS BECAUSE, IF NOT, SUM OF WEIGHTS INCREASES DENOMINATOR

def group_wa7(series):
    dropped = series.dropna()
    try:
        return np.average(dropped, weights = grow_yn_df.loc[dropped.index, 'wgt_hh'])
    except ZeroDivisionError:
        return 0

# Create dictionary agg_func --> then put inside agg()

agg_func8 = {'q07_02_1': [group_wa6, 'count'],
            'q07_02_2': [group_wa6, 'count'],
            'q07_02_3': [group_wa6, 'count'],
            'q07_02_4': [group_wa6, 'count'],
            'q07_02_6': [group_wa6, 'count']}

obj1 = grow_yn_df.groupby('urbrur').agg(agg_func8)
obj2 = grow_yn_df.groupby('region').agg(agg_func8)
obj3 = grow_yn_df.groupby('sex_of_head').agg(agg_func8)
obj4 = grow_yn_df.groupby('age_of_head_cat').agg(agg_func8)
obj5 = grow_yn_df.groupby('attain').agg(agg_func8)
obj6 = grow_yn_df.groupby('apci_dec').agg(agg_func8)
obj7 = grow_yn_df.groupby('main_language').agg(agg_func8)

# Stack the DataFrames on top of each other
vertical_stack = pd.concat([obj1, obj2, obj3, obj4, obj5, obj6, obj7], axis=0)

# Write DataFrame to CSV
vertical_stack.reset_index().to_csv('q07_02_1-6_wt.csv', index=False)

In [439]:
# Amount of crop grown - 'crops_possessed*'

# ------------  Create reduced dataframe -----------------

cols = list(['crops_possessed_1', 'crops_possessed_2','crops_possessed_3', 'crops_possessed_4', 'crops_possessed_6'])

amount_df = df[cols]
amount_df = amount_df.astype('float64')

ind_var = ['urbrur', 'region', 'sex_of_head', 'age_of_head_cat', 'attain', 'apci_dec', 'main_language', 'wgt_hh']
ind_df = df[ind_var]

amount_df = pd.concat([ind_df, amount_df], axis=1)

# CREATE FUNCTION WHICH CALCULATES WEIGHTED AVERAGE WITH NaN DROPPED -----
# MUST DROP NaN FROM ANALYSIS BECAUSE, IF NOT, SUM OF WEIGHTS INCREASES DENOMINATOR

def group_wa1(series):
    dropped = series.dropna()
    try:
        return np.average(dropped, weights=amount_df.loc[dropped.index, 'wgt_hh'])
    except ZeroDivisionError:
        return 0

# Create dictionary agg_func using dict comprehension --> then put inside agg()

#agg_func2 = {cols[i]: group_wa1 for i in range(len(cols))}

agg_func2 = {'crops_possessed_1': group_wa1,
            'crops_possessed_2': group_wa1,
            'crops_possessed_3': group_wa1,
            'crops_possessed_4': group_wa1,
            'crops_possessed_6': group_wa1}

obj1 = amount_df.groupby('urbrur').agg(agg_func2)
obj2 = amount_df.groupby('region').agg(agg_func2)
obj3 = amount_df.groupby('sex_of_head').agg(agg_func2)
obj4 = amount_df.groupby('age_of_head_cat').agg(agg_func2)
obj5 = amount_df.groupby('attain').agg(agg_func2)
obj6 = amount_df.groupby('apci_dec').agg(agg_func2)
obj7 = amount_df.groupby('main_language').agg(agg_func2)

# Stack the DataFrames on top of each other
vertical_stack = pd.concat([obj1, obj2, obj3, obj4, obj5, obj6, obj7], axis=0)

# Write DataFrame to CSV
vertical_stack.reset_index().to_csv('q07_03_wt.csv', index=False)

In [450]:
# Amount of crop consumed - q07_04_1

# ------------  Create reduced dataframe -----------------

cols = list(['q07_04_1', 'q07_04_2','q07_04_3', 'q07_04_4', 'q07_04_6'])

amount_cons_df = df[cols]
#amount_cons_df = amount_df.astype('float64')

ind_var = ['urbrur', 'region', 'sex_of_head', 'age_of_head_cat', 'attain', 'apci_dec', 'main_language', 'wgt_hh']
ind_df = df[ind_var]

amount_cons_df = pd.concat([ind_df, amount_cons_df], axis=1)

# CREATE FUNCTION WHICH CALCULATES WEIGHTED AVERAGE WITH NaN DROPPED -----
# MUST DROP NaN FROM ANALYSIS BECAUSE, IF NOT, SUM OF WEIGHTS INCREASES DENOMINATOR

def group_wa2(series):
    dropped = series.dropna()
    try:
        return np.average(dropped, weights = amount_cons_df.loc[dropped.index, 'wgt_hh'])
    except ZeroDivisionError:
        return 0

# Create dictionary agg_func using dict comprehension --> then put inside agg()

#agg_func2 = {cols[i]: group_wa1 for i in range(len(cols))}

agg_func3 = {'q07_04_1': [group_wa2, 'count'],
            'q07_04_2': [group_wa2, 'count'],
            'q07_04_3': [group_wa2, 'count'],
            'q07_04_4': [group_wa2, 'count'],
            'q07_04_6': [group_wa2, 'count']}

obj1 = amount_cons_df.groupby('urbrur').agg(agg_func3)
obj2 = amount_cons_df.groupby('region').agg(agg_func3)
obj3 = amount_cons_df.groupby('sex_of_head').agg(agg_func3)
obj4 = amount_cons_df.groupby('age_of_head_cat').agg(agg_func3)
obj5 = amount_cons_df.groupby('attain').agg(agg_func3)
obj6 = amount_cons_df.groupby('apci_dec').agg(agg_func3)
obj7 = amount_cons_df.groupby('main_language').agg(agg_func3)

# Stack the DataFrames on top of each other
vertical_stack = pd.concat([obj1, obj2, obj3, obj4, obj5, obj6, obj7], axis=0)

# Write DataFrame to CSV
vertical_stack.reset_index().to_csv('q07_04_wt.csv', index=False)

In [441]:
# How much of crop grown was given away
cols = ['crops_given_away_1', 'crops_given_away_2', 'crops_given_away_3', 'crops_given_away_4', 'crops_given_away_6']

amount_give_df = df[cols]
#amount_cons_df = amount_df.astype('float64')

ind_var = ['urbrur', 'region', 'sex_of_head', 'age_of_head_cat', 'attain', 'apci_dec', 'main_language', 'wgt_hh']
ind_df = df[ind_var]

amount_give_df = pd.concat([ind_df, amount_give_df], axis=1)

# CREATE FUNCTION WHICH CALCULATES WEIGHTED AVERAGE WITH NaN DROPPED -----
# MUST DROP NaN FROM ANALYSIS BECAUSE, IF NOT, SUM OF WEIGHTS INCREASES DENOMINATOR

def group_wa3(series):
    dropped = series.dropna()
    try:
        return np.average(dropped, weights = amount_give_df.loc[dropped.index, 'wgt_hh'])
    except ZeroDivisionError:
        return 0

# Create dictionary agg_func using dict comprehension --> then put inside agg()

#agg_func2 = {cols[i]: group_wa1 for i in range(len(cols))}

agg_func4 = {'crops_given_away_1': group_wa3,
            'crops_given_away_2': group_wa3,
            'crops_given_away_3': group_wa3,
            'crops_given_away_4': group_wa3,
            'crops_given_away_6': group_wa3}

obj1 = amount_give_df.groupby('urbrur').agg(agg_func4)
obj2 = amount_give_df.groupby('region').agg(agg_func4)
obj3 = amount_give_df.groupby('sex_of_head').agg(agg_func4)
obj4 = amount_give_df.groupby('age_of_head_cat').agg(agg_func4)
obj5 = amount_give_df.groupby('attain').agg(agg_func4)
obj6 = amount_give_df.groupby('apci_dec').agg(agg_func4)
obj7 = amount_give_df.groupby('main_language').agg(agg_func4)

# Stack the DataFrames on top of each other
vertical_stack = pd.concat([obj1, obj2, obj3, obj4, obj5, obj6, obj7], axis=0)

# Write DataFrame to CSV
vertical_stack.reset_index().to_csv('q07_05_wt.csv', index=False)

In [442]:
# How much of crop grown was sold
cols = ['crops_sold_1', 'crops_sold_2', 'crops_sold_3', 'crops_sold_4', 'crops_sold_6']

amount_sold_df = df[cols]

ind_var = ['urbrur', 'region', 'sex_of_head', 'age_of_head_cat', 'attain', 'apci_dec', 'main_language', 'wgt_hh']
ind_df = df[ind_var]

amount_sold_df = pd.concat([ind_df, amount_sold_df], axis=1)

# CREATE FUNCTION WHICH CALCULATES WEIGHTED AVERAGE WITH NaN DROPPED -----
# MUST DROP NaN FROM ANALYSIS BECAUSE, IF NOT, SUM OF WEIGHTS INCREASES DENOMINATOR

def group_wa4(series):
    dropped = series.dropna()
    try:
        return np.average(dropped, weights = amount_sold_df.loc[dropped.index, 'wgt_hh'])
    except ZeroDivisionError:
        return 0

# Create dictionary agg_func using dict comprehension --> then put inside agg()

#agg_func2 = {cols[i]: group_wa1 for i in range(len(cols))}

agg_func5 = {'crops_sold_1': group_wa4,
            'crops_sold_2': group_wa4,
            'crops_sold_3': group_wa4,
            'crops_sold_4': group_wa4,
            'crops_sold_6': group_wa4}

obj1 = amount_sold_df.groupby('urbrur').agg(agg_func5)
obj2 = amount_sold_df.groupby('region').agg(agg_func5)
obj3 = amount_sold_df.groupby('sex_of_head').agg(agg_func5)
obj4 = amount_sold_df.groupby('age_of_head_cat').agg(agg_func5)
obj5 = amount_sold_df.groupby('attain').agg(agg_func5)
obj6 = amount_sold_df.groupby('apci_dec').agg(agg_func5)
obj7 = amount_sold_df.groupby('main_language').agg(agg_func5)

# Stack the DataFrames on top of each other
vertical_stack = pd.concat([obj1, obj2, obj3, obj4, obj5, obj6, obj7], axis=0)

# Write DataFrame to CSV
vertical_stack.reset_index().to_csv('q07_06_wt.csv', index=False)

In [443]:
# Of crop grown that was sold, avg price/kg (mean, $N/kg)

#create $N/kg variable
df['crops_sales_value_1'] = df['crops_sales_value_1'] / df['crops_sold_1']
df['crops_sales_value_2'] = df['crops_sales_value_2'] / df['crops_sold_2']

cols = ['crops_sales_value_1', 'crops_sales_value_2']

value_sold_df = df[cols]

ind_var = ['urbrur', 'region', 'sex_of_head', 'age_of_head_cat', 'attain', 'apci_dec', 'main_language', 'wgt_hh']
ind_df = df[ind_var]

value_sold_df = pd.concat([ind_df, value_sold_df], axis=1)

# CREATE FUNCTION WHICH CALCULATES WEIGHTED AVERAGE WITH NaN DROPPED -----
# MUST DROP NaN FROM ANALYSIS BECAUSE, IF NOT, SUM OF WEIGHTS INCREASES DENOMINATOR

def group_wa5(series):
    dropped = series.dropna()
    try:
        return np.average(dropped, weights = value_sold_df.loc[dropped.index, 'wgt_hh'])
    except ZeroDivisionError:
        return 0

# Create dictionary agg_func using dict comprehension --> then put inside agg()

#agg_func2 = {cols[i]: group_wa1 for i in range(len(cols))}

agg_func6 = {'crops_sales_value_1': group_wa5,
            'crops_sales_value_2': group_wa5}

obj1 = value_sold_df.groupby('urbrur').agg(agg_func6)
obj2 = value_sold_df.groupby('region').agg(agg_func6)
obj3 = value_sold_df.groupby('sex_of_head').agg(agg_func6)
obj4 = value_sold_df.groupby('age_of_head_cat').agg(agg_func6)
obj5 = value_sold_df.groupby('attain').agg(agg_func6)
obj6 = value_sold_df.groupby('apci_dec').agg(agg_func6)
obj7 = value_sold_df.groupby('main_language').agg(agg_func6)

# Stack the DataFrames on top of each other
vertical_stack = pd.concat([obj1, obj2, obj3, obj4, obj5, obj6, obj7], axis=0)

# Write DataFrame to CSV
vertical_stack.reset_index().to_csv('q07_07_wt.csv', index=False)

In [444]:
# Produce butter/oils/fats - q07_08_3

cols = ['q07_08_3']

butter_df = df[cols]

ind_var = ['urbrur', 'region', 'sex_of_head', 'age_of_head_cat', 'attain', 'apci_dec', 'main_language', 'wgt_hh']
ind_df = df[ind_var]

butter_df = pd.concat([ind_df, butter_df], axis=1)

# CREATE FUNCTION WHICH CALCULATES WEIGHTED AVERAGE WITH NaN DROPPED -----
# MUST DROP NaN FROM ANALYSIS BECAUSE, IF NOT, SUM OF WEIGHTS INCREASES DENOMINATOR

def group_wa6(series):
    dropped = series.dropna()
    try:
        return np.average(dropped, weights = butter_df.loc[dropped.index, 'wgt_hh'])
    except ZeroDivisionError:
        return 0

# Create dictionary agg_func --> then put inside agg()

agg_func7 = {'q07_08_3': group_wa6}

obj1 = butter_df.groupby('urbrur').agg(agg_func7)
obj2 = butter_df.groupby('region').agg(agg_func7)
obj3 = butter_df.groupby('sex_of_head').agg(agg_func7)
obj4 = butter_df.groupby('age_of_head_cat').agg(agg_func7)
obj5 = butter_df.groupby('attain').agg(agg_func7)
obj6 = butter_df.groupby('apci_dec').agg(agg_func7)
obj7 = butter_df.groupby('main_language').agg(agg_func7)

# Stack the DataFrames on top of each other
vertical_stack = pd.concat([obj1, obj2, obj3, obj4, obj5, obj6, obj7], axis=0)

# Write DataFrame to CSV
vertical_stack.reset_index().to_csv('q07_08_3_wt.csv', index=False)


EXTRA TO DELETE

In [None]:
# USEFUL CODE
#output_names = []
#output_names.extend([v for k,v in locals().items() if k.startswith('o_')])
    
#frame = {cols[i]: output_names[i] for i in range(len(output_names))} 

In [10]:
# Grow crop
#grow_crop_urban = df.loc[:,'q07_02_1':'q07_02_6'].groupby(df['urban']).agg(['mean'])
#grow_crop_region = df.loc[:,'q07_02_1':'q07_02_6'].groupby(df['region']).agg(['mean'])
#grow_crop_sex = df.loc[:,'q07_02_1':'q07_02_6'].groupby(df['sex_of_head']).agg(['mean'])
#grow_crop_age = df.loc[:,'q07_02_1':'q07_02_6'].groupby(df['age_of_head_cat']).agg(['mean'])
#grow_crop_attain = df.loc[:,'q07_02_1':'q07_02_6'].groupby(df['attain']).agg(['mean'])
#grow_crop_income = df.loc[:,'q07_02_1':'q07_02_6'].groupby(df['apci_dec']).agg(['mean'])
#grow_crop_language = df.loc[:,'q07_02_1':'q07_02_6'].groupby(df['main_language']).agg(['mean'])

# Stack the DataFrames on top of each other
#vertical_stack = pd.concat([grow_crop_urban, grow_crop_region, grow_crop_sex, grow_crop_age, grow_crop_attain, grow_crop_income, grow_crop_language], axis=0)

# Write DataFrame to CSV
#vertical_stack.reset_index().to_csv('q07_02_1-6.csv', index=False)

In [11]:
# q_07_03 How much of crop grown (kgs)
#cols = ['crops_possessed_1', 'crops_possessed_2', 'crops_possessed_3', 'crops_possessed_4', 'crops_possessed_5', 'crops_possessed_6']

#grow_crop_amount_urban = df.loc[:,cols].groupby(df['urban']).agg(['mean', 'sum'])
#grow_crop_amount_region = df.loc[:,cols].groupby(df['region']).agg(['mean', 'sum'])
#grow_crop_amount_sex = df.loc[:,cols].groupby(df['sex_of_head']).agg(['mean', 'sum'])
#grow_crop_amount_age = df.loc[:,cols].groupby(df['age_of_head_cat']).agg(['mean', 'sum'])
#grow_crop_amount_attain = df.loc[:,cols].groupby(df['attain']).agg(['mean', 'sum'])
#grow_crop_amount_income = df.loc[:,cols].groupby(df['apci_dec']).agg(['mean', 'sum'])
#grow_crop_amount_language = df.loc[:,cols].groupby(df['main_language']).agg(['mean', 'sum'])

# Stack the DataFrames on top of each other
#vertical_stack = pd.concat([grow_crop_amount_urban, grow_crop_amount_region, grow_crop_amount_sex, grow_crop_amount_age, grow_crop_amount_attain, grow_crop_amount_income, grow_crop_amount_language], axis=0)

# Write DataFrame to CSV
#vertical_stack.reset_index().to_csv('q07_03.csv', index=False)

In [410]:
# Consumption on Food
#food_cons_urban = df.loc[:,'g01_food_capita'].groupby(df['urban']).agg(['mean', 'median', 'count'])
#food_cons_region = df.loc[:,'g01_food_capita'].groupby(df['region']).agg(['mean', 'median', 'count'])
#food_cons_sex = df.loc[:,'g01_food_capita'].groupby(df['sex_of_head']).agg(['mean', 'median', 'count'])
#food_cons_age = df.loc[:,'g01_food_capita'].groupby(df['age_of_head_cat']).agg(['mean', 'median', 'count'])
#food_cons_attain = df.loc[:,'g01_food_capita'].groupby(df['attain']).agg(['mean', 'median', 'count'])
#food_cons_income = df.loc[:,'g01_food_capita'].groupby(df['apci_dec']).agg(['mean', 'median', 'count'])
#food_cons_language = df.loc[:,'g01_food_capita'].groupby(df['main_language']).agg(['mean', 'median', 'count'])

# Stack the DataFrames on top of each other
#vertical_stack = pd.concat([food_cons_urban, food_cons_region, food_cons_sex, food_cons_age, food_cons_attain, food_cons_income, food_cons_language], axis=0)

# Write DataFrame to CSV
#vertical_stack.reset_index().to_csv('g01_food.csv', index=False)