In [1]:
import numpy as np 
import pandas as pd 
import os 
import glob
import warnings
warnings.filterwarnings('ignore')    ## I don't like pandas setting with copy warnings 

In [2]:
### FYI: GDD has no data for Somalia. I have included it in my list tho

emro = ['AFG', 'ARE', 'BHR', 'DJI','EGY','IRN','IRQ','JOR',
        'KWT', 'LBN', 'LBR', 'MAR', 'OMN', 'PAK','PSE' , 'QAT','SAU',
        'SDN','SOM', 'SYR', 'YEM','TUN']
drop_list = ['superregion2','age','urban','edu']

def select(col, val,df):
        df = df[df[col] == val]
        return df

def emro_select(df:pd.DataFrame):
    
    d1 = select('age',999,df)
    d2 = select('edu',999,d1)
    d3 = select('urban',999,d2)
    d3 = d3.drop(drop_list,axis=1)
#     d4 = d3[d3['iso3'].isin(emro)]

    all = select('female',999,d3)
    males = select('female',0,d3)
    females = select('female',1,d3)
        
    return all, males, females 


In [3]:
def get_pivots(df):
    """
    Pivot the DataFrame so that scores are arranged with years as rows and ISO3 codes as columns.
    """
    cf = df.pivot_table(index='year', columns='iso3', values='score').transpose()
    return cf 


# Updated dash_score function that uses the new gram-based cutoffs.
def dash_score(df: pd.DataFrame, food_group: str, scoring_scheme: str) -> pd.DataFrame:
    """
    Score the DASH metric based on the new gram-based cutoffs.

    For 'pos' scoring: assign a 1 if the median is greater than or equal to the cutoff.
    For 'neg' scoring: assign a 1 if the median is less than the cutoff.
    """
    dash_cutoffs = {
        'meats': 510,         # Meats/Poultry/Fish. For neg: lower intake is better.
        'dairy': 400,         # Dairy. For pos: higher intake is better.
        'fruit': 320,         # Fruits.
        'veg': 320,           # Vegetables.
        'whole_grain': 180,   # Whole Grains.
        'SSB': 178.6,         # Sugar-sweetened beverages.
        'sugar': 178.6,       # Sugar (assumed same as SSB).
        'sodium': 2300        # Sodium in mg/day.
    }
    
    if food_group not in dash_cutoffs:
        raise ValueError(f"No DASH cutoff defined for food group '{food_group}'")
    
    cutoff_val = dash_cutoffs[food_group]
    
    if scoring_scheme == 'pos':
        # For positive scoring: score 1 if median >= cutoff.
        df['score'] = df['median'].apply(lambda x: 1 if x >= cutoff_val else 0)
    elif scoring_scheme == 'neg':
        # For negative scoring: score 1 if median < cutoff.
        df['score'] = df['median'].apply(lambda x: 1 if x < cutoff_val else 0)
    else:
        raise ValueError("scoring_scheme must be 'pos' or 'neg'")
    
    return df

def aio_v3_dash(df: pd.DataFrame, food_group: str, scoring_scheme: str, pivot: bool=True):
    """
    Process the input DataFrame for DASH scoring:
      1. Splits the data into subgroups (all, males, females) using emro_select.
      2. Applies DASH scoring based on the new gram-based cutoffs.
      3. Restricts the DataFrame to only EMRO countries.
      4. Optionally pivots the data to have years as rows and iso3 codes as columns.
    """
    # Split into groups.
    all_df, males_df, females_df = emro_select(df)
    
    # Apply DASH scoring based on the food group and scoring scheme.
    all_df = dash_score(all_df, food_group, scoring_scheme)
    males_df = dash_score(males_df, food_group, scoring_scheme)
    females_df = dash_score(females_df, food_group, scoring_scheme)
    
    # Filter only to the EMRO countries.
    all_df = all_df[all_df['iso3'].isin(emro)]
    males_df = males_df[males_df['iso3'].isin(emro)]
    females_df = females_df[females_df['iso3'].isin(emro)]
    
    if pivot:
        all_df = get_pivots(all_df)
        males_df = get_pivots(males_df)
        females_df = get_pivots(females_df)
    
    return all_df, males_df, females_df

def get_dash_scores(name: str, save: bool, v0_codes: list, scoring_scheme: str):
    """
    Sums the CSV files for a given DASH food group (via provided file paths),
    computes the DASH scores using the new cutoffs, and optionally saves the resulting DataFrames.
    """
    def sum_csv_files(file_paths, sum_columns):
        df_sum = None
        non_sum_columns = None
        for file in file_paths:
            df = pd.read_csv(file)
            # Separate columns for summing versus metadata.
            df_sum_cols = df[sum_columns]
            df_non_sum_cols = df.drop(columns=sum_columns)
            if df_sum is None:
                df_sum = df_sum_cols
                non_sum_columns = df_non_sum_cols  # Retain meta columns from the first file.
            else:
                df_sum = df_sum.add(df_sum_cols, fill_value=0)
        final_df = pd.concat([non_sum_columns, df_sum], axis=1)
        return final_df

    # Sum the CSV files (here summing the 'median' column).
    total = sum_csv_files(v0_codes, sum_columns=['median'])
    
    # Process with DASH scoring; pivot the output.
    all_df, males_df, females_df = aio_v3_dash(total, food_group=name, scoring_scheme=scoring_scheme, pivot=True)
    
    # Create directory for saving if needed.
    os.makedirs('scores/dash_cutoff/global', exist_ok=True)
    if save:
        all_df.to_csv(f'scores/dash_cutoff/{name}_all.csv')
        males_df.to_csv(f'scores/dash_cutoff/{name}_males.csv')
        females_df.to_csv(f'scores/dash_cutoff/{name}_females.csv')
    
    return all_df, males_df, females_df

# ------------------------------------------------------------
# Configuration for DASH scoring with new cutoff values and file addresses.
luffy = [ 
    ([r'..\raw_data\Country-level estimates\v08_cnty.csv'], 'whole_grain', 'pos'),
    ([r'..\raw_data\Country-level estimates\v01_cnty.csv'], 'fruit', 'pos'),
    ([r'..\raw_data\Country-level estimates\v02_cnty.csv',
      r'..\raw_data\Country-level estimates\v04_cnty.csv'], 'veg', 'pos'),  # Note: Includes starchy veggies.
    ([r'..\raw_data\Country-level estimates\v15_cnty.csv'], 'SSB', 'neg'),
    # ([r'..\raw_data\Country-level estimates\v35_cnty.csv'], 'sugar', 'neg'),  # Sugar may be handled separately.
    ([r'..\raw_data\Country-level estimates\v57_cnty.csv',
      r'..\raw_data\Country-level estimates\v14_cnty.csv',
      r'..\raw_data\Country-level estimates\v13_cnty.csv'], 'dairy', 'pos'),
    ([r'..\raw_data\Country-level estimates\v09_cnty.csv',
      r'..\raw_data\Country-level estimates\v11_cnty.csv',  # Total seafoods added.
      r'..\raw_data\Country-level estimates\v10_cnty.csv'], 'meats', 'neg'),
    ([r'..\raw_data\Country-level estimates\v37_cnty.csv'], 'sodium', 'neg')
]

# Loop through each configuration and generate (and optionally save) the DASH scores.
for files, food_group, scheme in luffy:
    get_dash_scores(name=food_group, save=True, v0_codes=files, scoring_scheme=scheme)


In [4]:
def get_dash_scores_nuts_legumes_combined(nuts_file: str, legumes_file: str, save: bool = True):
    """
    Combines nut and legume consumption (normalized), scores them according to DASH criteria,
    and saves the resulting DataFrames in the same format as other DASH scoring outputs.
    """
    # Load the datasets
    df_nuts = pd.read_csv(nuts_file)
    df_legumes = pd.read_csv(legumes_file)

    # Normalize intakes
    df_nuts['median'] = df_nuts['median'] / 28
    df_legumes['median'] = df_legumes['median'] / 100

    # Sum normalized values
    df_combined = df_nuts.copy()
    df_combined['median'] = df_nuts['median'] + df_legumes['median']

    # Apply DASH scoring: score = 1 if median >= 5/7, else 0
    df_combined['score'] = df_combined['median'].apply(lambda x: 1 if x >= 5/7 else 0)

    # Process the data (no need to re-score in aio_v3_dash, just use for EMRO filtering + split)
    all_df, males_df, females_df = emro_select(df_combined)
    all_df = all_df[all_df['iso3'].isin(emro)]
    males_df = males_df[males_df['iso3'].isin(emro)]
    females_df = females_df[females_df['iso3'].isin(emro)]

    all_df = get_pivots(all_df)
    males_df = get_pivots(males_df)
    females_df = get_pivots(females_df)

    # Save to files if needed
    if save:
        os.makedirs('scores/dash_cutoff/global', exist_ok=True)
        all_df.to_csv('scores/dash_cutoff/nuts_legumes_all.csv')
        males_df.to_csv('scores/dash_cutoff/nuts_legumes_males.csv')
        females_df.to_csv('scores/dash_cutoff/nuts_legumes_females.csv')

    return all_df, males_df, females_df

_,_,_ =get_dash_scores_nuts_legumes_combined(
    nuts_file=r'..\raw_data\Country-level estimates\v06_cnty.csv',
    legumes_file=r'..\raw_data\Country-level estimates\v05_cnty.csv',
    save=True
)

In [5]:
## a function to sum the scores for countries 
def calc_total(path: str, suff: str):
    all_glob = glob.glob(f'{path}/*_{suff}.csv')
    cumulative_df = None
    
    for i in all_glob:
        temp = pd.read_csv(i)
        
        if cumulative_df is None:
            cumulative_df = temp
        else:
            cumulative_df.iloc[:, 1:] += temp.iloc[:, 1:]
    
    return cumulative_df

folder = 'scores/dash_cutoff'  
for i in ['all', 'males', 'females']:
    temp_df = calc_total(folder, i)
    temp_df.to_csv(f'{folder}/total_{i}.csv', index=False)

So in **dash_emro** we have scores based on how countries scored in the EMRO region

and in **dash_global** we have thier scores based on how they scored across the world

***GLOBAL Scores***

In [6]:
drop_list = ['age','urban','edu']
keep = [
    'year','median','score'
]
def aio_v3_dash_glo(df: pd.DataFrame, food_group: str, scoring_scheme: str, pivot: bool=True):
    """
    Process the input DataFrame for DASH scoring:
      1. Split the data (if applicable) into groups.
      2. Apply the DASH scoring based on the provided food group and scoring scheme.
      3. Restrict the DataFrame to the columns in the 'keep' list.
      4. Optionally pivot the data.
    """
    # Split the dataframe into three groups.
    all_df, males_df, females_df = emro_select(df)
    
    # Apply dash scoring for each subgroup.
    all_df = dash_score(all_df, food_group, scoring_scheme)
    males_df = dash_score(males_df, food_group, scoring_scheme)
    females_df = dash_score(females_df, food_group, scoring_scheme)
    
    # drop useless columns
    all_df = all_df[keep]
    males_df = males_df[keep]
    females_df = females_df[keep]
    
    if pivot:
        all_df = get_pivots(all_df)
        males_df = get_pivots(males_df)
        females_df = get_pivots(females_df)
    
    return all_df, males_df, females_df

def get_dash_scores_glo(name: str, save: bool, v0_codes: list, scoring_scheme: str):
    """
    Sum the CSV files for a given food group, compute the DASH scores,
    and optionally save the resulting DataFrames.
    """
    def sum_csv_files(file_paths, sum_columns):
        df_sum = None
        non_sum_columns = None
        for file in file_paths:
            df = pd.read_csv(file)
            # Separate the columns to sum and the rest.
            df_sum_cols = df[sum_columns]
            df_non_sum_cols = df.drop(columns=sum_columns)
            if df_sum is None:
                df_sum = df_sum_cols
                non_sum_columns = df_non_sum_cols
            else:
                df_sum = df_sum.add(df_sum_cols, fill_value=0)
        final_df = pd.concat([non_sum_columns, df_sum], axis=1)
        return final_df

    # Sum the CSV files (here summing the 'median' column).
    total = sum_csv_files(v0_codes, sum_columns=['median'])
    
    # Compute DASH scores without pivoting.
    all_df, males_df, females_df = aio_v3_dash_glo(total, food_group=name, scoring_scheme=scoring_scheme, pivot=False)
    
    # Create directory for saving the results.
    os.makedirs('scores/dash_cutoff/global', exist_ok=True)
    if save:
        all_df.to_csv(f'scores/dash_cutoff/global/{name}_all.csv', index=False)
        males_df.to_csv(f'scores/dash_cutoff/global/{name}_males.csv', index=False)
        females_df.to_csv(f'scores/dash_cutoff/global/{name}_females.csv', index=False)
    
    return all_df, males_df, females_df


# List of tuples with (file paths, food group name, scoring scheme).
luffy = [
    ([r'..\raw_data\Global estimates\v08_global.csv'], 'whole_grain', 'pos'),
    ([r'..\raw_data\Global estimates\v01_global.csv'], 'fruit', 'pos'),
    ([r'..\raw_data\Global estimates\v02_global.csv',
      r'..\raw_data\Global estimates\v04_global.csv'], 'veg', 'pos'),         ###?? I have included other starchy veggies as well 
    ([r'..\raw_data\Global estimates\v15_global.csv'], 'SSB', 'neg'),          ###?? sugar?
    # ([r'..\raw_data\Global estimates\v35_global.csv'], 'sugar', 'neg'),
    ([r'..\raw_data\Global estimates\v57_global.csv',
      r'..\raw_data\Global estimates\v14_global.csv',
      r'..\raw_data\Global estimates\v13_global.csv'], 'dairy', 'pos'),
    ([r'..\raw_data\Global estimates\v09_global.csv',
      r'..\raw_data\Global estimates\v11_global.csv',                          ###!! ADDED Total seafoods
      r'..\raw_data\Global estimates\v10_global.csv'], 'meats', 'neg'),
    ([r'..\raw_data\Global estimates\v37_global.csv'], 'sodium', 'neg')
]

# Loop through each configuration and generate (and optionally save) the scores.
for file_list, food_group, scheme in luffy:
    get_dash_scores_glo(name=food_group, save=True, v0_codes=file_list, scoring_scheme=scheme)


In [7]:
def get_dash_scores_nuts_legumes_combined_global(nuts_file: str, legumes_file: str, save: bool = True):
    """
    Combines nut and legume consumption (normalized), scores them according to DASH criteria,
    and saves the resulting DataFrames in the same format as other DASH scoring outputs.
    """
    # Load the datasets
    df_nuts = pd.read_csv(nuts_file)
    df_legumes = pd.read_csv(legumes_file)

    # Normalize intakes
    df_nuts['median'] = df_nuts['median'] / 28
    df_legumes['median'] = df_legumes['median'] / 100

    # Sum normalized values
    df_combined = df_nuts.copy()
    df_combined['median'] = df_nuts['median'] + df_legumes['median']

    # Apply DASH scoring: score = 1 if median >= 5/7, else 0
    df_combined['score'] = df_combined['median'].apply(lambda x: 1 if x >= 5/7 else 0)

    # drop useless columns
    all_df, males_df, females_df = emro_select(df_combined)
    all_df = all_df[keep]
    males_df = males_df[keep]
    females_df = females_df[keep]
    
    # all_df = get_pivots(all_df)
    # males_df = get_pivots(males_df)
    # females_df = get_pivots(females_df)

    # Save to files if needed
    if save:
        os.makedirs('scores/dash_cutoff/global', exist_ok=True)
        all_df.to_csv('scores/dash_cutoff/global/nuts_legumes_all.csv')
        males_df.to_csv('scores/dash_cutoff/global/nuts_legumes_males.csv')
        females_df.to_csv('scores/dash_cutoff/global/nuts_legumes_females.csv')

    return all_df, males_df, females_df

_,_,_ =get_dash_scores_nuts_legumes_combined_global(
    nuts_file=r'..\raw_data\Global estimates\v06_global.csv',
    legumes_file=r'..\raw_data\Global estimates\v05_global.csv',
    save=True
)

In [8]:
## a function to sum the scores for countries 
def calc_total_glo(path: str, suff: str):
    all_glob = glob.glob(f'{path}/*_{suff}.csv')
    cumulative_df = None
    
    for i in all_glob:
        temp = pd.read_csv(i)
        
        if cumulative_df is None:
            cumulative_df = temp
        else:
            cumulative_df.iloc[:, 1:] += temp.iloc[:, 1:]
    cumulative_df.drop('median', axis=1,inplace=True)
    return cumulative_df

In [9]:
folder = 'scores/dash_cutoff/global'  
for i in ['all', 'males', 'females']:
    temp_df = calc_total_glo(folder, i)
    temp_df.to_csv(f'{folder}/total_{i}.csv', index=False)