In [4]:
import numpy as np
import pandas as pd
import os
import glob
import warnings
warnings.filterwarnings('ignore')

In [5]:
### Define EMRO countries and helper functions
emro = ['AFG', 'ARE', 'BHR', 'DJI','EGY','IRN','IRQ','JOR',
        'KWT', 'LBN', 'LBR', 'MAR', 'OMN', 'PAK','PSE' , 'QAT','SAU',
        'SDN','SOM', 'SYR', 'YEM','TUN']
drop_list = ['superregion2','age','urban','edu']

def select(col, val, df):
    df = df[df[col] == val]
    return df

def emro_select(df: pd.DataFrame):
    d1 = select('age', 999, df)
    d2 = select('edu', 999, d1)
    d3 = select('urban', 999, d2)
    d3 = d3.drop(drop_list, axis=1)
    d4 = d3[d3['iso3'].isin(emro)]

    all = select('female',999,d4)
    males = select('female',0,d4)
    females = select('female',1,d4)
        
    return all, males, females 


def get_pivots(df):
    cf = df.pivot_table(index='year', columns='iso3', values='score').transpose()
    return cf

In [6]:
# Updated dash_score function that uses the new gram-based cutoffs.
def medit_score(df: pd.DataFrame, food_group: str, scoring_scheme: str) -> pd.DataFrame:

    dash_cutoffs = {
        'dairy': 400,         # Dairy. For pos: higher intake is better.
        'fruit': 480,         # Fruits.
        'veg': 480,           # Vegetables.
        'fish':28.5,           # Fish.
        'bread':180,
        'legumes':28.5,
        'nuts':168,
        'eggs':31.4
    }
    
    if food_group not in dash_cutoffs:
        raise ValueError(f"No DASH cutoff defined for food group '{food_group}'")
    
    cutoff_val = dash_cutoffs[food_group]
    
    if scoring_scheme == 'hi':
        # For positive scoring: score 1 if median >= cutoff.
        df['score'] = df['median'].apply(lambda x: 1 if x >= cutoff_val else 0)
    elif scoring_scheme == 'low':
        # For negative scoring: score 1 if median <= cutoff.
        df['score'] = df['median'].apply(lambda x: 1 if x <= cutoff_val else 0)
    else:
        raise ValueError("scoring_scheme must be 'pos' or 'neg'")
    
    return df


def aio_medit(df: pd.DataFrame, food_group: str, scoring_scheme: str, pivot: bool=True):
    all_df, males_df, females_df = emro_select(df)
    
    all_df = medit_score(all_df, food_group, scoring_scheme)
    males_df = medit_score(males_df, food_group, scoring_scheme)
    females_df = medit_score(females_df, food_group, scoring_scheme)
    
    all_df = all_df[all_df['iso3'].isin(emro)]
    males_df = males_df[males_df['iso3'].isin(emro)]
    females_df = females_df[females_df['iso3'].isin(emro)]
    
    if pivot:
        all_df = get_pivots(all_df)
        males_df = get_pivots(males_df)
        females_df = get_pivots(females_df)
    
    return all_df, males_df, females_df

def get_medit_scores(name: str, save: bool, v0_codes: list, scoring_scheme: str):
    
    def sum_csv_files(file_paths, sum_columns):
        df_sum = None
        non_sum_columns = None
        for file in file_paths:
            df = pd.read_csv(file)
            # Separate columns for summing versus metadata.
            df_sum_cols = df[sum_columns]
            df_non_sum_cols = df.drop(columns=sum_columns)
            if df_sum is None:
                df_sum = df_sum_cols
                non_sum_columns = df_non_sum_cols  # Retain meta columns from the first file.
            else:
                df_sum = df_sum.add(df_sum_cols, fill_value=0)
        final_df = pd.concat([non_sum_columns, df_sum], axis=1)
        return final_df

    total = sum_csv_files(v0_codes, sum_columns=['median'])
    
    all_df, males_df, females_df = aio_medit(total, food_group=name, scoring_scheme=scoring_scheme, pivot=True)
    
    os.makedirs('scores/medit_new/global', exist_ok=True)
    if save:
        all_df.to_csv(f'scores/medit_new/{name}_all.csv')
        males_df.to_csv(f'scores/medit_new/{name}_males.csv')
        females_df.to_csv(f'scores/medit_new/{name}_females.csv')
    
    return all_df, males_df, females_df


# List of tuples with (file paths, food group name, scoring scheme).
ryuk = [
    ([r'..\raw_data\Country-level estimates\v08_cnty.csv'], 'bread', 'hi'), 
    ([r'..\raw_data\Country-level estimates\v01_cnty.csv'], 'fruit', 'hi'),
    ([r'..\raw_data\Country-level estimates\v02_cnty.csv',
      r'..\raw_data\Country-level estimates\v04_cnty.csv'], 'veg', 'hi'),
    ([r'..\raw_data\Country-level estimates\v06_cnty.csv'], 'nuts', 'hi'),
    ([r'..\raw_data\Country-level estimates\v05_cnty.csv'], 'legumes', 'hi'),
    ([r'..\raw_data\Country-level estimates\v57_cnty.csv',
      r'..\raw_data\Country-level estimates\v14_cnty.csv',
      r'..\raw_data\Country-level estimates\v13_cnty.csv'], 'dairy', 'low'),
    # ([r'..\raw_data\Country-level estimates\v09_cnty.csv',
    #   r'..\raw_data\Country-level estimates\v10_cnty.csv'], 'meats', 'low'),  ###! I'll have to do this one seprately.
    ([r'..\raw_data\Country-level estimates\v11_cnty.csv'], 'fish', 'hi'),
    ([r'..\raw_data\Country-level estimates\v12_cnty.csv'], 'eggs', 'hi'),
]

# Loop through each configuration and generate (and optionally save) the scores.
for file_list, food_group, scheme in ryuk:
    get_medit_scores(name=food_group, save=True, v0_codes=file_list, scoring_scheme=scheme)

In [7]:
def get_medit_meats(processed: str, unprocessed: str, save: bool = True):
    """
    Combines nut and legume consumption (normalized), scores them according to DASH criteria,
    and saves the resulting DataFrames in the same format as other DASH scoring outputs.
    """
    # Load the datasets
    df_pro = pd.read_csv(processed)
    df_unpro = pd.read_csv(unprocessed)

    # Normalize intakes
    df_pro['median'] = df_pro['median'] / 30        ### processed meat: 1 serving = 30g
    df_unpro['median'] = df_unpro['median'] / 85     ### unprocessed meat: 1 serving = 85g

    # Sum normalized values
    df_combined = df_pro.copy()
    df_combined['median'] = df_pro['median'] + df_unpro['median']

    df_combined['score'] = df_combined['median'].apply(lambda x: 1 if x <= 2 else 0)

    all_df, males_df, females_df = emro_select(df_combined)
    all_df = all_df[all_df['iso3'].isin(emro)]
    males_df = males_df[males_df['iso3'].isin(emro)]
    females_df = females_df[females_df['iso3'].isin(emro)]

    all_df = get_pivots(all_df)
    males_df = get_pivots(males_df)
    females_df = get_pivots(females_df)

    if save:
        os.makedirs('scores/medit_new/global', exist_ok=True)
        all_df.to_csv('scores/medit_new/meats_all.csv')
        males_df.to_csv('scores/medit_new/meats_males.csv')
        females_df.to_csv('scores/medit_new/meats_females.csv')

    return all_df, males_df, females_df

_,_,_ = get_medit_meats(
    processed=r'..\raw_data\Country-level estimates\v09_cnty.csv',
    unprocessed=r'..\raw_data\Country-level estimates\v10_cnty.csv',
    save=True
)

In [8]:
## a function to sum the scores for countries 
def calc_total(path: str, suff: str):
    all_glob = glob.glob(f'{path}/*_{suff}.csv')
    cumulative_df = None
    
    for i in all_glob:
        temp = pd.read_csv(i)
        
        if cumulative_df is None:
            cumulative_df = temp
        else:
            cumulative_df.iloc[:, 1:] += temp.iloc[:, 1:]
    
    return cumulative_df

folder = 'scores/medit_new'  
for i in ['all', 'males', 'females']:
    temp_df = calc_total(folder, i)
    temp_df.to_csv(f'{folder}/total_{i}.csv', index=False)