# Descriptive Summary Statistics

In [97]:
import pandas as pd
import numpy as np
import scipy
import matplotlib.pyplot as plt
import statsmodels as sm
pd.set_option('display.max_columns', 500)
pd.set_option('display.max_row', 500)

In [98]:
df = pd.read_csv("../processed_data/df_for_descriptive_stats.csv")

In [99]:
df.sample(5).reset_index()

Unnamed: 0,index,hh1,hh2,area,zone,hh_members_num,num_of_women_15_49,num_of_men_15_49,num_under_5_child,num_5_17_child,hhsex,hhage,helevel,hh_own_dwelling,hh_agricultural_land,hh_own_animal,hh_mn_attended_sch_num,hh_mn_mean_age_yrs,hh_mn_attended_atleast_sec_sch,hh_mn_mean_life_satisfaction,hh_wm_attended_sch_num,hh_wm_mean_age_yrs,hh_wm_attended_atleast_sec_sch,hh_wm_mean_life_satisfaction,water_source,wi_quintile_mics,urban_wi_quintile_mics,hh_ls_num,fies_score,hhweightmics,psu,stratum,prob_mod_sev,prob_sev,rs_fies_score,WORRIED,HEALTHY,FEWFOOD,SKIPPED,ATELESS,RANOUT,HUNGRY,WHLDAY,hh_ag_land_size_cat,hh_ls_num_clean_iqr,hh_ls_num_clean,hh_members_num_clean_iqr,hh_members_num_clean,num_adult,num_children,hh_siz_cat,hh_age_cat,hh_num_child_cat,hh_num_adult_cat,PFI_moderate_severe,PFI_severe,PFI_mid_moderate,FS,MSI,SFI,fies_0_3_7,FI_Food_Insecure
0,1450,381.0,8.0,URBAN,North East,6.0,1.0,0.0,1.0,3.0,Male,50.0,Senior secondary,OWN,YES,NO,0.0,0.0,0.0,0.0,1.0,36.0,1.0,2.0,PIPED WATER: PIPED INTO DWELLING,Richest,Fourth,0,0,0.385214,381.0,8.0,0.0,0.0,0,0,0,0,0,0,0,0,0,1-9 hectares,0.0,0.0,6.0,1.43299,1.0,4.0,>5,>45,>1,<2,0,0,1,1,0,0,0-3,0
1,771,198.0,10.0,URBAN,South East,2.0,1.0,0.0,0.0,0.0,Male,36.0,Higher/tertiary,OTHER,NO,NO,0.0,0.0,0.0,0.0,1.0,21.0,1.0,3.0,TUBE WELL / BOREHOLE,Richest,Richest,0,5,4.545452,198.0,4.0,0.875,0.002,5,1,1,1,1,1,0,0,0,0 hectare,0.0,0.0,2.0,2.0,1.0,0.0,<5,36-45,<2,<2,1,0,0,0,1,0,4-6,1
2,865,206.0,12.0,URBAN,North East,4.0,1.0,0.0,2.0,0.0,Male,34.0,Senior secondary,OWN,NO,NO,0.0,0.0,0.0,0.0,1.0,22.0,1.0,10.0,PACKAGED WATER: SACHET WATER,Richest,Richest,0,3,0.744123,206.0,5.0,0.284,0.0,3,0,1,1,0,1,0,0,0,0 hectare,0.0,0.0,4.0,1.43299,1.0,2.0,<5,26-35,>1,<2,0,0,1,0,0,0,0-3,1
3,1530,385.0,19.0,URBAN,North East,8.0,2.0,0.0,3.0,2.0,Male,38.0,Higher/tertiary,RENT,NO,NO,0.0,0.0,0.0,0.0,1.0,32.5,1.0,5.0,TUBE WELL / BOREHOLE,Richest,Fourth,0,4,0.231128,385.0,8.0,0.616,0.0,4,0,1,1,1,1,0,0,0,0 hectare,0.0,0.0,8.0,1.43299,2.0,5.0,>5,36-45,>1,>1,1,0,0,0,1,0,4-6,1
4,6019,1181.0,13.0,URBAN,South West,1.0,0.0,1.0,0.0,0.0,Male,36.0,Senior secondary,RENT,NO,NO,1.0,36.0,1.0,8.0,0.0,0.0,0.0,0.0,PACKAGED WATER: SACHET WATER,Richest,Fourth,0,8,4.971919,1181.0,24.0,1.0,0.821,8,1,1,1,1,1,1,1,1,0 hectare,0.0,0.0,1.0,1.0,1.0,0.0,<5,36-45,<2,<2,0,1,0,0,1,1,7-8,1


In [100]:
df.shape

(10680, 61)

### Table 1: Summary of Food Insecurity Measures and Indicators. (weighted using the household sampling weight).

In [101]:
fies_questions = list(df.loc[:, "WORRIED": "WHLDAY"].columns)
fies_indicators = list(df[["prob_mod_sev", "prob_sev", 
                             "fies_score","FS", "MSI", "SFI","FI_Food_Insecure",
                            "PFI_moderate_severe", "PFI_severe", "PFI_mid_moderate"]].columns)

fies_cols = fies_questions + fies_indicators + ["hhweightmics"]
fies_df = df[fies_cols] 

In [102]:
# comppute the number of observations
n_obs = fies_df.iloc[:, :-1].count()

# calculate unweighted mean
# Calculate the mean
mean = fies_df.iloc[:, :-1].mean()

# Calculate the standard deviation
std = fies_df.iloc[:, :-1].std()

# Calculate the standard error
standard_error = std / np.sqrt(n_obs)

# Calculate the weighted mean
weighted_mean = (fies_df.iloc[:, :-1].multiply(fies_df["hhweightmics"], axis=0)).sum() / fies_df["hhweightmics"].sum()

# Function to calculate the weighted standard deviation
def weighted_std(values, weights):
    average = np.average(values, weights=weights)
    variance = np.average((values - average) ** 2, weights=weights)
    return np.sqrt(variance)

# Calculate the weighted standard deviation
weighted_std_dev = fies_df.iloc[:, :-1].apply(lambda x: weighted_std(x, fies_df["hhweightmics"]))

# Calculate the weighted standard error
weighted_se = weighted_std_dev / np.sqrt(n_obs)

# Create a summary DataFrame
summary_df = pd.DataFrame({
    'Number of Observations': n_obs,
    'Mean': mean,
    'Standard Deviation': std,
    'Standard Error': standard_error,
    'Weighted Mean': weighted_mean,
    'Weighted Standard Deviation': weighted_std_dev,
    'Weighted Standard Error': weighted_se
})

# Summary of Food Insecurity Measures and Indicators. 
print("Table 1")
summary_df.round(3)

Table 1


Unnamed: 0,Number of Observations,Mean,Standard Deviation,Standard Error,Weighted Mean,Weighted Standard Deviation,Weighted Standard Error
WORRIED,10680,0.755,0.43,0.004,0.76,0.427,0.004
HEALTHY,10680,0.716,0.451,0.004,0.717,0.45,0.004
FEWFOOD,10680,0.722,0.448,0.004,0.717,0.45,0.004
SKIPPED,10680,0.639,0.48,0.005,0.648,0.478,0.005
ATELESS,10680,0.687,0.464,0.004,0.689,0.463,0.004
RANOUT,10680,0.571,0.495,0.005,0.571,0.495,0.005
HUNGRY,10680,0.497,0.5,0.005,0.485,0.5,0.005
WHLDAY,10680,0.287,0.453,0.004,0.264,0.441,0.004
prob_mod_sev,10680,0.681,0.419,0.004,0.682,0.419,0.004
prob_sev,10680,0.265,0.334,0.003,0.257,0.328,0.003


**Table 1 Presents the following:**

* the weighted pooled summary statistics and shows that 70.9 percent of urban households experienced moderate
or severe food insecurity. (MSI is measure based on `fies_score` >= 4)
* About 19 percent of households report not experiencing at least one of the eight dimensions of food insecurity that constitute the FIES
* About 80.7 percent of households report experiencing at least one of the eight dimensions of food insecurity that constitute the FIES


### 2a. Estimated total number of household (all and by zone) that are 
* (i) moderately or severely food insecure and `MSI`
* (ii) severely food insecure `SFI`

#### Unweighted Estimate

In [103]:
def estimate_food_insecurity(df, agg_func="sum", wt="hhweightmics"):

    columns = ["zone","MSI", "SFI", "FI_Food_Insecure"]

    if wt:
        columns.append(wt)

    df = df[columns]
    df_ = df.copy()
    
    if wt:
        df_.loc[:, 'weighted_MSI'] = df_['MSI'] * df_[wt]
        df_.loc[:, 'weighted_SFI'] = df_['SFI'] * df_[wt]
        # Calculate weighted MSI and SFI
        df_.loc[:,'weighted_FI_Food_Insecure'] = df_['FI_Food_Insecure'] * df_[wt]
    else:
        df_.loc[:, 'weighted_MSI'] = df_['MSI']
        df_.loc[:, 'weighted_SFI'] = df_['SFI']
        # Calculate weighted MSI and SFI
        df_.loc[:,'weighted_FI_Food_Insecure'] = df_['FI_Food_Insecure']
        
    # Compute the total number of household by zone
    total_by_zone = df_.groupby("zone", observed=True).agg(
        Total_MSI = ("weighted_MSI", agg_func),
        Total_SFI = ("weighted_SFI", agg_func),
        Total_Food_Insecure = ("weighted_FI_Food_Insecure", agg_func),
        n_obs = ("zone", "count")
        
    )
    
    # Calculate overall totals
    overall_totals = pd.DataFrame({
        'Total_MSI': [df_['weighted_MSI'].agg(agg_func)],
        'Total_SFI': [df_['weighted_SFI'].agg(agg_func)],
        "Total_Food_Insecure": [df_['weighted_FI_Food_Insecure'].agg(agg_func)],
        "n_obs" : [df_.shape[0]]
    })
    
    combined_FI = pd.concat([overall_totals.T, total_by_zone.T], axis=1)
    
    # Rename column
    return combined_FI.rename(columns={0:"Country"}).reset_index()

estimate_food_insecurity(df, wt=None)

Unnamed: 0,index,Country,North Central,North East,North West,South East,South South,South West
0,Total_MSI,7546,1543,1054,1018,627,890,2414
1,Total_SFI,4423,895,688,598,368,520,1354
2,Total_Food_Insecure,8646,1711,1252,1212,708,1014,2749
3,n_obs,10680,2086,1567,1469,792,1218,3548


#### Weighted

In [104]:
# Compute the weighted estimate
estimate_food_insecurity(df, wt="hhweightmics").round(2)

Unnamed: 0,index,Country,North Central,North East,North West,South East,South South,South West
0,Total_MSI,11021.78,1276.83,514.55,1364.33,2043.86,1486.25,4335.95
1,Total_SFI,6331.29,745.22,342.82,811.28,1219.3,756.04,2456.62
2,Total_Food_Insecure,12537.31,1405.31,614.32,1622.09,2280.28,1718.79,4896.51
3,n_obs,10680.0,2086.0,1567.0,1469.0,792.0,1218.0,3548.0


### 2b.Estimate average incidence of 
* (i) moderate or severe food insecurity `MSI`
* (ii) severe food insecurity among the urban household population `SFI`


In [105]:
estimate_food_insecurity(df, agg_func="mean", wt=None).round(3)

Unnamed: 0,index,Country,North Central,North East,North West,South East,South South,South West
0,Total_MSI,0.707,0.74,0.673,0.693,0.792,0.731,0.68
1,Total_SFI,0.414,0.429,0.439,0.407,0.465,0.427,0.382
2,Total_Food_Insecure,0.81,0.82,0.799,0.825,0.894,0.833,0.775
3,n_obs,10680.0,2086.0,1567.0,1469.0,792.0,1218.0,3548.0


In [106]:
# Weighted estimate of mean
# estimate_food_insecurity(df, agg_func="mean", wt="hhweightmics").round(3)
# Calculate weighted sums by zone
zone_totals = df.groupby('zone', observed=True).apply(lambda x: pd.Series({
    'Weighted_MSI': (x['MSI'] * x['hhweightmics']).sum(),
    'Weighted_SFI': (x['SFI'] * x['hhweightmics']).sum(),
    'Weighted_Food_Insecure': (x['FI_Food_Insecure'] * x['hhweightmics']).sum(),
    'Total_Weight': x['hhweightmics'].sum()
}), include_groups=False).reset_index()

# Calculate mean number of households by zone
zone_totals['Mean_MSI'] = zone_totals['Weighted_MSI'] / zone_totals['Total_Weight']
zone_totals['Mean_SFI'] = zone_totals['Weighted_SFI'] / zone_totals['Total_Weight']
zone_totals['Mean_Food_Insecure'] = zone_totals['Weighted_Food_Insecure'] / zone_totals['Total_Weight']


# Calculate overall totals
overall_totals = pd.Series({
    'zone': 'Overall',
    'Weighted_MSI': (df['MSI'] * df['hhweightmics']).sum(),
    'Weighted_SFI': (df['SFI'] * df['hhweightmics']).sum(),
    'Weighted_Food_Insecure': (df['FI_Food_Insecure'] * df['hhweightmics']).sum(),
    'Total_Weight': df['hhweightmics'].sum()
})

# Calculate overall mean number of households
overall_totals['Mean_MSI'] = overall_totals['Weighted_MSI'] / overall_totals['Total_Weight']
overall_totals['Mean_SFI'] = overall_totals['Weighted_SFI'] / overall_totals['Total_Weight']
overall_totals['Mean_Food_Insecure'] = overall_totals['Weighted_Food_Insecure'] / overall_totals['Total_Weight']


# Combine both results into one DataFrame
combined_df = pd.concat([pd.DataFrame([overall_totals]), zone_totals]).round(3)

# Display combined DataFrame
combined_df[['zone', 'Mean_MSI', 'Mean_SFI', 'Mean_Food_Insecure']].T

Unnamed: 0,0,0.1,1,2,3,4,5
zone,Overall,North Central,North East,North West,South East,South South,South West
Mean_MSI,0.709,0.774,0.67,0.705,0.782,0.712,0.669
Mean_SFI,0.408,0.452,0.446,0.419,0.466,0.362,0.379
Mean_Food_Insecure,0.807,0.852,0.8,0.838,0.872,0.823,0.756


**Unweighted Observation**


*  Severe food insecurity(`Mean_SFI`) alone affects an estimated 41% of the urban household population.
*  Severe food insecurity(`MEan_Food_insecure`) alone affects an estimated 8600, or 81% of the urban household population.

In [107]:
# # Standard Error
# # estimate_food_insecurity(df, agg_func="sem", wt="hhweightmics").round(3)

# zone_totals = df.groupby('zone', observed=True).apply(lambda x: pd.Series({
#     'Weighted_MSI': (x['MSI'] * x['hhweightmics']).sem(),
#     'Weighted_SFI': (x['SFI'] * x['hhweightmics']).sem(),
#     'Weighted_Food_Insecure': (x['FI_Food_Insecure'] * x['hhweightmics']).sem(),
#     'Total_Weight': x['hhweightmics'].sem()
# }), include_groups=False).reset_index()

# # Calculate mean number of households by zone
# zone_totals['Mean_MSI'] = zone_totals['Weighted_MSI'] / zone_totals['Total_Weight']
# zone_totals['Mean_SFI'] = zone_totals['Weighted_SFI'] / zone_totals['Total_Weight']
# zone_totals['Mean_Food_Insecure'] = zone_totals['Weighted_Food_Insecure'] / zone_totals['Total_Weight']


# # Calculate overall totals
# overall_totals = pd.Series({
#     'zone': 'Overall',
#     'Weighted_MSI': (df['MSI'] * df['hhweightmics']).sem(),
#     'Weighted_SFI': (df['SFI'] * df['hhweightmics']).sem(),
#     'Weighted_Food_Insecure': (df['FI_Food_Insecure'] * df['hhweightmics']).sem(),
#     'Total_Weight': df['hhweightmics'].sem()
# })

# # Calculate overall mean number of households
# overall_totals['Mean_MSI'] = overall_totals['Weighted_MSI'] / overall_totals['Total_Weight']
# overall_totals['Mean_SFI'] = overall_totals['Weighted_SFI'] / overall_totals['Total_Weight']
# overall_totals['Mean_Food_Insecure'] = overall_totals['Weighted_Food_Insecure'] / overall_totals['Total_Weight']


# # Combine both results into one DataFrame
# combined_df = pd.concat([pd.DataFrame([overall_totals]), zone_totals]).round(3)

# # Display combined DataFrame
# combined_df[['zone', 'Mean_MSI', 'Mean_SFI', 'Mean_Food_Insecure']].T

### Definition of Variables

we selected a certain number of socio-demographic and economic variables from the MICS 6 database, such as, geographical zone, age of household head, education level, socio-economic level expressed as a quintile of the wealth index, etc...

In [108]:
df = df.drop(columns=["hh_num_child_cat", "hh_num_adult_cat"], axis=1)

In [109]:
# Change object type to cateogory types

df[df.select_dtypes(['object']).columns] = df.select_dtypes(['object']).apply(lambda x: x.astype('category'))

In [110]:
# children under five years old (None, 1, 2+)
df["num_child_under5_cat"] = pd.cut(df["num_under_5_child"], bins=[-1, 0, 1, float("inf")], labels=['None', '1', '2+']) 

# children between 5 to 17 (None, 1, 2+)
df["num_child_5_17_cat"] = pd.cut(df["num_5_17_child"], bins=[-1, 0, 1, float("inf")], labels=['None', '1', '2+']) 

# Number of women 15 - 49 (None, 1, 2+)
df["num_of_women_15_49_cat"] = pd.cut(df["num_of_women_15_49"], bins=[-1, 0, 1, float("inf")], labels=['None', '1', '2+']) 

# Number of men 15 - 49 (None, 1, 2+)
df["num_of_men_15_49_cat"] = pd.cut(df["num_of_men_15_49"], bins=[-1, 0, 1, float("inf")], labels=['None', '1', '2+']) 

# Number of adults
df["num_of_adults_cat"] = pd.cut(df["num_adult"], bins=[-1, 0, 1, float("inf")], labels=['None', '1', '2+']) 

# Number of children
df["num_of_children_cat"] = pd.cut(df["num_children"], bins=[-1, 0, 1, float("inf")], labels=['None', '1', '2+']) 


# Number of men that attended school, (None, 1, 2+)
df["num_of_men_attended_sch_cat"] = pd.cut(df["hh_mn_attended_sch_num"], bins=[-1, 0, 1, float("inf")], labels=['None', '1', '2+']) 

# Number of women that attended school, (None, 1, 2+)
df["num_of_women_attended_sch_cat"] = pd.cut(df["hh_wm_attended_sch_num"], bins=[-1, 0, 1, float("inf")], labels=['None', '1', '2+']) 



In [149]:
# Selec
df2 = df[["zone", "hhsex", "hh_age_cat", "hh_siz_cat", "helevel", "hh_own_dwelling",
          "urban_wi_quintile_mics", "hh_agricultural_land", "hh_own_animal","hh_ag_land_size_cat",
          "num_child_under5_cat", 'num_child_5_17_cat',
          "WORRIED", "HEALTHY", "FEWFOOD", "SKIPPED", "ATELESS", "RANOUT", "HUNGRY", "WHLDAY",
          "fies_score", "FI_Food_Insecure", "SFI", "MSI", "FS","fies_0_3_7",
          "prob_mod_sev", "prob_sev", 
          
          "num_of_women_15_49_cat",
          "num_of_men_15_49_cat", 
          "num_of_adults_cat",
          "num_of_children_cat",
          "num_of_men_attended_sch_cat", 
          "num_of_women_attended_sch_cat",
          "hhweightmics"
         ]]

df2.to_csv("../processed_data/df2.csv", index=False)


### Table 3: Socio-demographic and economic characteristics of the study sample of urban household, MICS6a

In [150]:
df.head(2)

Unnamed: 0,hh1,hh2,area,zone,hh_members_num,num_of_women_15_49,num_of_men_15_49,num_under_5_child,num_5_17_child,hhsex,hhage,helevel,hh_own_dwelling,hh_agricultural_land,hh_own_animal,hh_mn_attended_sch_num,hh_mn_mean_age_yrs,hh_mn_attended_atleast_sec_sch,hh_mn_mean_life_satisfaction,hh_wm_attended_sch_num,hh_wm_mean_age_yrs,hh_wm_attended_atleast_sec_sch,hh_wm_mean_life_satisfaction,water_source,wi_quintile_mics,urban_wi_quintile_mics,hh_ls_num,fies_score,hhweightmics,psu,stratum,prob_mod_sev,prob_sev,rs_fies_score,WORRIED,HEALTHY,FEWFOOD,SKIPPED,ATELESS,RANOUT,HUNGRY,WHLDAY,hh_ag_land_size_cat,hh_ls_num_clean_iqr,hh_ls_num_clean,hh_members_num_clean_iqr,hh_members_num_clean,num_adult,num_children,hh_siz_cat,hh_age_cat,PFI_moderate_severe,PFI_severe,PFI_mid_moderate,FS,MSI,SFI,fies_0_3_7,FI_Food_Insecure,num_child_under5_cat,num_child_5_17_cat,num_of_women_15_49_cat,num_of_men_15_49_cat,num_of_adults_cat,num_of_children_cat,num_of_men_attended_sch_cat,num_of_women_attended_sch_cat
0,1.0,1.0,URBAN,South East,4.0,3.0,0.0,0.0,1.0,Female,46.0,Primary,RENT,NO,NO,0.0,0.0,0.0,0.0,3.0,28.666667,2.0,3.333333,TUBE WELL / BOREHOLE,Fourth,Middle,0,7,0.61955,1.0,1.0,0.995,0.39,7,1,1,1,1,1,1,1,0,0 hectare,0.0,0.0,4.0,1.43299,3.0,1.0,<5,>45,1,0,0,0,1,1,7-8,1,,1,2+,,2+,1,,2+
1,1.0,2.0,URBAN,South East,2.0,2.0,0.0,0.0,1.0,Female,45.0,Senior secondary,RENT,NO,NO,0.0,0.0,0.0,0.0,2.0,30.5,2.0,5.5,TUBE WELL / BOREHOLE,Fourth,Second,0,6,0.61955,1.0,1.0,0.971,0.043,6,1,1,1,1,1,1,0,0,0 hectare,0.0,0.0,2.0,2.0,2.0,1.0,<5,36-45,1,0,0,0,1,0,4-6,1,,1,2+,,2+,1,,2+


In [151]:

def weighted_freq(df: pd.DataFrame, grp_by: str, wt: str="hhweightmics")->pd.DataFrame:
    """
    Calculate the frequency, weighted frequency, and weighted proportion for a specified categorical variable in a DataFrame.

    Parameters:
    ----------
    df : pd.DataFrame
        The input DataFrame containing the data.
    grp_by : str
        The name of the column to group by (i.e., the categorical variable).
    wt : str, optional
        The name of the column containing the weights for each observation. Default is 'hhweightmics'.

    Returns:
    -------
    pd.DataFrame
        A DataFrame containing the following columns:
        - 'Frequency': The unweighted count of each category.
        - 'Weighted Frequency': The sum of the weights for each category.
        - 'Proportion': The weighted proportion (as a percentage) of each category, rounded to two decimal places.
    
    Example:
    -------
    >>> df = pd.DataFrame({
            'category': ['A', 'A', 'B', 'B', 'C'],
            'hhweightmics': [1.2, 2.1, 1.5, 1.7, 2.3]
        })
    >>> result = weighted_freq(df, grp_by='category', wt='hhweightmics')
    >>> print(result)
      category  Frequency  Weighted Frequency  Proportion
    0        A          2                3.0       25.0
    1        B          2                3.0       25.0
    2        C          1                2.0       50.0
    """
    
    freq_count = df.groupby(grp_by, observed=True).size()
    weighted_freq = df.groupby(grp_by, observed=True).apply(lambda x: (x[wt].sum()), include_groups=False)
    weighted_proportion = weighted_freq * 100/ weighted_freq.sum()  # Convert to proportions
    # Combine into a DataFrame
    result_df = pd.DataFrame({
        'Frequency': freq_count,
        'Weighted Frequency': weighted_freq,
        'Proportion': weighted_proportion}).reset_index()
    return result_df.round({"Proportion": 2,"Weighted Frequency":0 })


In [152]:
# compute the descriptive statistics of each predictor variables
weighted_freq(df2, "urban_wi_quintile_mics")

Unnamed: 0,urban_wi_quintile_mics,Frequency,Weighted Frequency,Proportion
0,Fourth,1821,3319.0,21.36
1,Middle,2080,3263.0,21.0
2,Poorest,2678,2821.0,18.16
3,Richest,1724,3146.0,20.25
4,Second,2377,2987.0,19.23


* use the function to generate the descriptive statistics.

In [214]:

weighted_freq(df2, "FI_Food_Insecure")

Unnamed: 0,FI_Food_Insecure,Frequency,Weighted Frequency,Proportion
0,0,2034,2999.0,19.3
1,1,8646,12537.0,80.7


### Table 4: Characteristics of the urban household population by food security status, MICS6a

In [153]:
## Import the function for importing R packages in Python
from rpy2.robjects.packages import importr
from rpy2.robjects import pandas2ri
import rpy2.robjects as ro

# Load the rpy2.ipython extension to use R magic commands in Jupyter notebooks
%load_ext rpy2.ipython
pandas2ri.activate()

The rpy2.ipython extension is already loaded. To reload it, use:
  %reload_ext rpy2.ipython


In [185]:
%%R
library(tidyverse)
library(survey)
library(gtsummary)
library(dplyr)
if(!require('survey')) {
    install.packages('survey')
    install.packages("gtsummary")
    library('survey')
    library(gtsummary)
}

In [186]:
%%R

data <- read_csv("../processed_data/df2.csv")

Rows: 10680 Columns: 35
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (19): zone, hhsex, hh_age_cat, hh_siz_cat, helevel, hh_own_dwelling, urb...
dbl (16): WORRIED, HEALTHY, FEWFOOD, SKIPPED, ATELESS, RANOUT, HUNGRY, WHLDA...

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.


In [208]:
%%R
survey::svydesign(~1, data = data, weights = ~hhweightmics) |>
    tbl_svysummary(by = fies_0_3_7, 
                   type = list(c(hh_agricultural_land, hh_own_animal) ~ "categorical"),
                   include = c(hhsex, zone, hh_age_cat, hh_siz_cat,
                               helevel, hh_own_dwelling,
                               urban_wi_quintile_mics,
                               hh_agricultural_land,
                               hh_ag_land_size_cat,
                               hh_own_animal,
                               num_child_5_17_cat,
                               num_child_under5_cat,
                               num_of_children_cat,
                               num_of_adults_cat
                              )) |>
add_p() |>
add_ci() |>
bold_labels()

`...` must be empty.
✖ Problematic argument:
• useS4 = FALSE


### Table 5: Status of FI Experience Scale (FIES) questions

In [236]:
%%R

fies<- data %>% select('WORRIED':'WHLDAY')

# Function to calculate proportion of 1s and 0s
calc_proportions <- function(x) {
  prop_1 <- mean(x == 1)*100
  prop_0 <- mean(x == 0)*100
  c(prop_1 = prop_1, prop_0 = prop_0)
}

# Function to calculate confidence interval for each variable
calc_ci <- function(x) {
  ci <- binom.test(sum(x == 1), length(x))$conf.int
  return(ci*100)
}

# Function to calculate Chi-Square statistic for each variable
calc_chisq <- function(x) {
  chisq.test(table(x))$p.value
}

# Apply functions to dataset
proportions <- t(sapply(fies, calc_proportions))
confidence_intervals <- t(sapply(fies, calc_ci))
chi_squares <- sapply(fies, calc_chisq)


results_df <- data.frame(
  Variable = colnames(fies),
  Proportion_1 = proportions[, "prop_1"],
  Proportion_0 = proportions[, "prop_0"],
  CI_Lower = confidence_intervals[, 1],
  CI_Upper = confidence_intervals[, 2],
  Chi_Square = chi_squares
)
results_df

        Variable Proportion_1 Proportion_0 CI_Lower CI_Upper    Chi_Square
WORRIED  WORRIED     75.53371     24.46629 74.70683 76.34664  0.000000e+00
HEALTHY  HEALTHY     71.57303     28.42697 70.70709 72.42721  0.000000e+00
FEWFOOD  FEWFOOD     72.19101     27.80899 71.33060 73.03931  0.000000e+00
SKIPPED  SKIPPED     63.91386     36.08614 62.99466 64.82546 7.132869e-182
ATELESS  ATELESS     68.67041     31.32959 67.78105 69.54959  0.000000e+00
RANOUT    RANOUT     57.12547     42.87453 56.18036 58.06669  4.292966e-49
HUNGRY    HUNGRY     49.70974     50.29026 48.75699 50.66264  5.485478e-01
WHLDAY    WHLDAY     28.73596     71.26404 27.87892 29.60460  0.000000e+00


In [238]:

from tabulate import tabulate

# Create the DataFrame
data = {
    'Variable': ['WORRIED', 'HEALTHY', 'FEWFOOD', 'SKIPPED', 'ATELESS', 'RANOUT', 'HUNGRY', 'WHLDAY'],
    'Proportion_1': [75.53371, 71.57303, 72.19101, 63.91386, 68.67041, 57.12547, 49.70974, 28.73596],
    'Proportion_0': [24.46629, 28.42697, 27.80899, 36.08614, 31.32959, 42.87453, 50.29026, 71.26404],
    'CI_Lower': [74.70683, 70.70709, 71.33060, 62.99466, 67.78105, 56.18036, 48.75699, 27.87892],
    'CI_Upper': [76.34664, 72.42721, 73.03931, 64.82546, 69.54959, 58.06669, 50.66264, 29.60460],
    'Chi_Square': [0.000000e+00, 0.000000e+00, 0.000000e+00, 7.132869e-182, 0.000000e+00, 4.292966e-49, 5.485478e-01, 0.000000e+00]
}

df = pd.DataFrame(data)

df



Unnamed: 0,Variable,Proportion_1,Proportion_0,CI_Lower,CI_Upper,Chi_Square
0,WORRIED,75.53371,24.46629,74.70683,76.34664,0.0
1,HEALTHY,71.57303,28.42697,70.70709,72.42721,0.0
2,FEWFOOD,72.19101,27.80899,71.3306,73.03931,0.0
3,SKIPPED,63.91386,36.08614,62.99466,64.82546,7.132869e-182
4,ATELESS,68.67041,31.32959,67.78105,69.54959,0.0
5,RANOUT,57.12547,42.87453,56.18036,58.06669,4.292966e-49
6,HUNGRY,49.70974,50.29026,48.75699,50.66264,0.5485478
7,WHLDAY,28.73596,71.26404,27.87892,29.6046,0.0


<hr>
<div>
    <a href="./5_hh_mics6_modelling.ipynb">
        <button style="float: right;">&#8594; Modelling Notebook </button>
    </a>
    <a href="./3_hh_mics6_eda.ipynb">
        <button>&#8592; EDA Notebook</button>
    </a>
</div>
<hr>