In [1]:
import numpy as np
import pandas as pd
import os
import glob
import warnings
warnings.filterwarnings('ignore')

In [2]:
### Define EMRO countries and helper functions
emro = ['AFG', 'ARE', 'BHR', 'DJI','EGY','IRN','IRQ','JOR',
        'KWT', 'LBN', 'LBR', 'MAR', 'OMN', 'PAK','PSE' , 'QAT','SAU',
        'SDN','SOM', 'SYR', 'YEM','TUN']
drop_list = ['superregion2','age','urban','edu', 'varnum',
             'upperci_95','lowerci_95','serving','s_lowerci_95','s_upperci_95',]

def select(col, val, df):
    df = df[df[col] == val]
    return df

def emro_select(df: pd.DataFrame):
    d1 = select('age', 999, df)
    d2 = select('edu', 999, d1)
    d3 = select('urban', 999, d2)
    d3 = d3.drop(drop_list, axis=1)
    d4 = d3[d3['iso3'].isin(emro)]
    
    males = select('female', 0, d4)
    females = select('female', 1, d4)
    
    return males, females

def get_pivots(df):
    cf = df.pivot_table(index='year', columns='iso3', values='score').transpose()
    return cf

In [3]:
# Update Dixon Index cutoffs to include separate values for males and females
dixon_cutoffs_grams_gender = {
    'fruit': {'male': 320, 'female': 320},
    'veg': {'male': 320, 'female': 320},
    'whole_grains': {'male': 141, 'female': 120},
    'dairy': {'male': 400, 'female': 400},
    'meats': {'male': 170, 'female': 170},
    'added_sugar': {'male': 3, 'female': 3},     ###% of daily calories
    'saturated_fats': {'male':5, 'female': 5},    ### % of daily calories
    'nuts_legumes': {'male':4, 'female': 3}       ###! serving size... i have specific function for these 
}

In [4]:
def dash_dixon_index_score_gender(df: pd.DataFrame, food_group: str, scoring_scheme: str, gender: str) -> pd.DataFrame:
    """
    Calculate the DASH Dixon Index score using gender-specific cutoffs in grams.
    """
    if food_group not in dixon_cutoffs_grams_gender:
        raise ValueError(f"No Dixon Index cutoff defined for food group '{food_group}'")
    if gender not in ['male', 'female']:
        raise ValueError(f"Gender must be 'male' or 'female', got '{gender}'")
    
    cutoff_val = dixon_cutoffs_grams_gender[food_group][gender]
    
    # Apply scoring based on the Dixon Index.
    if scoring_scheme == 'pos':
        df['score'] = df['median'].apply(lambda x: 1 if x >= cutoff_val else 0)
    elif scoring_scheme == 'neg':
        df['score'] = df['median'].apply(lambda x: 1 if x <= cutoff_val else 0)
    else:
        raise ValueError("scoring_scheme must be 'pos' or 'neg'")
    
    return df

In [5]:
def get_dash_dixon_scores_gender(name: str, save: bool, v0_codes: list, scoring_scheme: str):
    """
    Sums the CSV files for a given DASH food group (via provided file paths),
    computes the DASH Dixon scores with gender-specific cutoffs, and optionally saves the resulting DataFrames.
    """
    def sum_csv_files(file_paths, sum_columns):
        df_sum = None
        non_sum_columns = None
        for file in file_paths:
            df = pd.read_csv(file)
            df_sum_cols = df[sum_columns]
            df_non_sum_cols = df.drop(columns=sum_columns)
            if df_sum is None:
                df_sum = df_sum_cols
                non_sum_columns = df_non_sum_cols
            else:
                df_sum = df_sum.add(df_sum_cols, fill_value=0)
        final_df = pd.concat([non_sum_columns, df_sum], axis=1)
        return final_df
    
    total = sum_csv_files(v0_codes, sum_columns=['median'])
    males_df, females_df = emro_select(total)
    males_df = dash_dixon_index_score_gender(males_df, food_group=name, scoring_scheme=scoring_scheme, gender='male')
    females_df = dash_dixon_index_score_gender(females_df, food_group=name, scoring_scheme=scoring_scheme, gender='female')
    
    os.makedirs('scores/dash_dixon', exist_ok=True)
    if save:
        males_df.to_csv(f'scores/dash_dixon/{name}_males.csv', index=False)
        females_df.to_csv(f'scores/dash_dixon/{name}_females.csv', index=False)
    
    return males_df, females_df

In [6]:
# List of tuples with (file paths, food group name, scoring scheme).
dixon_config = [
    ([r'..\raw_data\Country-level estimates\v08_cnty.csv'], 'whole_grains', 'pos'),
    ([r'..\raw_data\Country-level estimates\v01_cnty.csv'], 'fruit', 'pos'),
    ([r'..\raw_data\Country-level estimates\v02_cnty.csv',
      r'..\raw_data\Country-level estimates\v03_cnty.csv',
      r'..\raw_data\Country-level estimates\v04_cnty.csv'], 'veg', 'pos'),
    ([r'..\raw_data\Country-level estimates\v09_cnty.csv',
      r'..\raw_data\Country-level estimates\v10_cnty.csv'], 'meats', 'neg'),
    ([r'..\raw_data\Country-level estimates\v57_cnty.csv',
      r'..\raw_data\Country-level estimates\v14_cnty.csv',
      r'..\raw_data\Country-level estimates\v13_cnty.csv'], 'dairy', 'pos'),
      ([r'..\raw_data\Country-level estimates\v35_cnty.csv'], 'added_sugar', 'neg'),
      ([r'..\raw_data\Country-level estimates\v27_cnty.csv'], 'saturated_fats', 'neg')
]

# Loop through each configuration and generate (and optionally save) the DASH Dixon scores.
for file_list, food_group, scheme in dixon_config:
    get_dash_dixon_scores_gender(name=food_group, save=True, v0_codes=file_list, scoring_scheme=scheme)

KeyError: "['varnum', 'serving', 's_lowerci_95', 's_upperci_95'] not found in axis"

In [None]:
def calculate_combined_score(file1: str, file2: str, food_group: str, scoring_scheme: str,
                              normalization_factor1: float, normalization_factor2:float, save: bool = True):
    """
    Generalized function to combine two food groups (e.g., nuts and legumes, meats), normalize their values,
    and calculate scores based on gender-specific cutoffs.
    """
    # Load the datasets
    df1 = pd.read_csv(file1)
    df2 = pd.read_csv(file2)
    
    # Normalize intakes based on provided normalization factors
    df1['median'] = df1['median'] / normalization_factor1
    df2['median'] = df2['median'] / normalization_factor2
    
    # Sum normalized values
    df_combined = df1.copy()
    df_combined['median'] = df1['median'] + df2['median']
    
    # Apply gender-specific scoring
    males_df, females_df = emro_select(df_combined)
    males_df = dash_dixon_index_score_gender(males_df, food_group=food_group, scoring_scheme=scoring_scheme, gender='male')
    females_df = dash_dixon_index_score_gender(females_df, food_group=food_group, scoring_scheme=scoring_scheme, gender='female')
    
    # Save to files if needed
    if save:
        os.makedirs('scores/dash_dixon', exist_ok=True)
        males_df.to_csv(f'scores/dash_dixon/{food_group}_combined_males.csv', index=False)
        females_df.to_csv(f'scores/dash_dixon/{food_group}_combined_females.csv', index=False)
    
    return males_df, females_df

_,_= calculate_combined_score(
    file1=r'..\raw_data\Country-level estimates\v06_cnty.csv', normalization_factor1=28,  # Nuts and seeds 
    file2=r'..\raw_data\Country-level estimates\v05_cnty.csv', normalization_factor2=100,  # legumes
    food_group='nuts_legumes', scoring_scheme='pos', save=True 
)