In [1]:
import numpy as np 
import pandas as pd 
import os 
import glob
import warnings
warnings.filterwarnings('ignore')    ## I don't like pandas setting with copy warnings 

In [None]:
### FYI: GDD has no data for Somalia. I have included it in my list tho

emro = ['AFG', 'ARE', 'BHR', 'DJI','EGY','IRN','IRQ','JOR',
        'KWT', 'LBN', 'LBR', 'MAR', 'OMN', 'PAK','PSE' , 'QAT','SAU',
        'SDN','SOM', 'SYR', 'YEM','TUN']
drop_list = ['superregion2','age','urban','edu']

def select(col, val,df):
        df = df[df[col] == val]
        return df

def emro_select(df:pd.DataFrame):
    d1 = select('age',999,df)
    d2 = select('edu',999,d1)
    d3 = select('urban',999,d2)
    d3 = d3.drop(drop_list,axis=1)
    # d4 = d3[d3['iso3'].isin(emro)]

    all = select('female',999,d3)
    males = select('female',0,d3)
    females = select('female',1,d3)
        
    return all, males, females 



In [3]:
def get_pivots(df):
    cf = df.pivot_table(index='year', columns='iso3', values='score').transpose()
    return cf 


# Updated medit_score function that uses gram-based cutoffs from your table.
def medit_score(df: pd.DataFrame, food_group: str, scoring_scheme: str) -> pd.DataFrame:
    # Define cutoff mapping based on average grams per day.
    # The values are chosen as the lower bound of the range for each food group.
    cutoffs = {
        'meats': 170,       # g/day, for low scoring: 1 if median < 170
        'dairy': 200,       # g/day, for low scoring: 1 if median < 200
        'fruit': 240,       # g/day, for hi scoring: 1 if median > 240
        'sea_food': 28.6,   # g/day, for hi scoring: 1 if median > 28.6
        'veg': 480,         # g/day, for hi scoring: 1 if median > 480
        'whole_grain': 90,  # g/day, for hi scoring: 1 if median > 90
        'legumes': 28.6,    # g/day, for hi scoring: 1 if median > 28.6
        'nuts': 84,         # g/day, for hi scoring: 1 if median > 84
        'MUFA_div_SFA': 1   # g/day, for hi scoring: 1 if median > 1
    }
    
    if food_group not in cutoffs:
        raise ValueError(f"No gram cutoff defined for food group '{food_group}'")
    cutoff_val = cutoffs[food_group]
    
    # Score using the gram-based cutoffs.
    if scoring_scheme == 'hi':
        # For "hi", assign 1 if median > cutoff, else 0.
        df['score'] = df['median'].apply(lambda x: 1 if x >= cutoff_val else 0)
    elif scoring_scheme == 'low':
        # For "low", assign 1 if median < cutoff, else 0.
        df['score'] = df['median'].apply(lambda x: 1 if x < cutoff_val else 0)
    else:
        raise ValueError("scoring_scheme must be either 'hi' or 'low'")
    return df

def aio_v2(df: pd.DataFrame, food_group: str, scoring_scheme: str, pivot: bool=True):
    # Split the input dataframe into all, males, and females.
    all_df, males_df, females_df = emro_select(df)
    
    # Apply the scoring based on the gram cutoff.
    all_df = medit_score(all_df, food_group, scoring_scheme)
    males_df = medit_score(males_df, food_group, scoring_scheme)
    females_df = medit_score(females_df, food_group, scoring_scheme)
    
    # Filter to keep only the EMRO countries (assuming 'emro' is a list defined elsewhere).
    all_df = all_df[all_df['iso3'].isin(emro)]
    males_df = males_df[males_df['iso3'].isin(emro)]
    females_df = females_df[females_df['iso3'].isin(emro)]
    
    if pivot:
        all_df = get_pivots(all_df)
        males_df = get_pivots(males_df)
        females_df = get_pivots(females_df)
    
    return all_df, males_df, females_df

def get_medit_scores(name: str, save: bool, v0_codes: list, scoring_scheme: str):
    def sum_csv_files(file_paths, sum_columns):
        df_sum = None
        non_sum_columns = None
        for file in file_paths:
            df = pd.read_csv(file)
            # Separate the columns to be summed and the remaining columns.
            df_sum_cols = df[sum_columns]
            df_non_sum_cols = df.drop(columns=sum_columns)
            if df_sum is None:
                df_sum = df_sum_cols
                non_sum_columns = df_non_sum_cols  # Retain these columns from the first file.
            else:
                df_sum = df_sum.add(df_sum_cols, fill_value=0)
        final_df = pd.concat([non_sum_columns, df_sum], axis=1)
        return final_df

    # Sum the CSV files using the provided file paths.
    total = sum_csv_files(v0_codes, sum_columns=['median'])
    # Use the provided food group name and scoring scheme.
    all_df, males_df, females_df = aio_v2(total, food_group=name, scoring_scheme=scoring_scheme, pivot=True)
    
    # Create directory for output scores if it doesn't exist.
    os.makedirs('scores/medit_cutoff', exist_ok=True)
    if save:
        all_df.to_csv(f'scores/medit_cutoff/{name}_all.csv')
        males_df.to_csv(f'scores/medit_cutoff/{name}_males.csv')
        females_df.to_csv(f'scores/medit_cutoff/{name}_females.csv')
    return all_df, males_df, females_df

# List of tuples with (file paths, food group name, scoring scheme).
ryuk = [
    ([r'..\raw_data\Country-level estimates\v08_cnty.csv'], 'whole_grain', 'hi'), 
    ([r'..\raw_data\Country-level estimates\v01_cnty.csv'], 'fruit', 'hi'),
    ([r'..\raw_data\Country-level estimates\v02_cnty.csv',
      r'..\raw_data\Country-level estimates\v04_cnty.csv'], 'veg', 'hi'),
    ([r'..\raw_data\Country-level estimates\v06_cnty.csv'], 'nuts', 'hi'),
    ([r'..\raw_data\Country-level estimates\v05_cnty.csv'], 'legumes', 'hi'),
    ([r'..\raw_data\Country-level estimates\v57_cnty.csv',
      r'..\raw_data\Country-level estimates\v14_cnty.csv',
      r'..\raw_data\Country-level estimates\v13_cnty.csv'], 'dairy', 'low'),
    ([r'..\raw_data\Country-level estimates\v09_cnty.csv',
      r'..\raw_data\Country-level estimates\v10_cnty.csv'], 'meats', 'low'),
    ([r'..\raw_data\Country-level estimates\v11_cnty.csv'], 'sea_food', 'hi')
]

# Loop through each configuration and generate (and optionally save) the scores.
for file_list, food_group, scheme in ryuk:
    get_medit_scores(name=food_group, save=True, v0_codes=file_list, scoring_scheme=scheme)


In [4]:
def div_n_stich(path1, path2):
    df1 = pd.read_csv(path1)
    df2 = pd.read_csv(path2)
    val1 = df1.pop('median')
    val2 = df2.pop('median')

    dived = val1.divide(val2)
    df1['median'] = dived
    return df1

def get_medit_scores_fat (name:str, save:bool, p1,p2, type:str):
    total = div_n_stich(p1,p2)
    all, males, females = aio_v2(total,food_group=name,scoring_scheme=type,pivot=True)

    if save:
        all.to_csv(f'scores\medit_cutoff\{name}_all.csv')
        males.to_csv(f'scores\medit_cutoff\{name}_males.csv')
        females.to_csv(f'scores\medit_cutoff\{name}_females.csv')
    return all, males, females

_,_,_ = get_medit_scores_fat(name='MUFA_div_SFA', save=True,
                             p1=r'..\raw_data\Country-level estimates\v28_cnty.csv',
                             p2=r'..\raw_data\Country-level estimates\v27_cnty.csv', type='hi')

In [5]:
### just to make sure the above code is working fine, i will calculate all_genders file manually

mufa = pd.read_csv(r'..\raw_data\Country-level estimates\v28_cnty.csv')
sfa = pd.read_csv(r'..\raw_data\Country-level estimates\v27_cnty.csv')

mufa_all, mufa_m, mufa_f = emro_select(mufa)
sfa_all, sfa_m, sfa_m = emro_select(sfa)

pre_all = mufa_all['median'].divide(sfa_all['median'])
allz = pd.concat([pre_all,mufa_all[['iso3','year']]],axis=1)
allz = allz[allz['iso3'].isin(emro)]
allz
# # all_score = medit_score(allz,'hi')
# all_score = pd.DataFrame({'score': (allz['median'] > 1).astype(int)})
# all_score_piv = get_pivots(all_score)
# all_score_piv

Unnamed: 0,median,iso3,year
827,0.645697,AFG,1990
1655,0.651238,AFG,1995
2483,0.664936,AFG,2000
3311,0.661792,AFG,2005
4139,0.660647,AFG,2010
...,...,...,...
1051559,1.491249,YEM,2000
1052387,1.604865,YEM,2005
1053215,1.582730,YEM,2010
1054043,1.589369,YEM,2015


it's exactly the same as the mufa_div_sfa_all.csv

In [6]:
## a function to sum the scores for countries 

def calc_total(path: str, suff: str):
    all_glob = glob.glob(f'{path}/*_{suff}.csv')
    
    cumulative_df = None

    for i in all_glob:
        temp = pd.read_csv(i)
        
        if cumulative_df is None:
            cumulative_df = temp
        else:
            cumulative_df.iloc[:, 1:] += temp.iloc[:, 1:]
    
    return cumulative_df

folder = 'scores/medit_cutoff'  
for i in ['all', 'males', 'females']:
    temp_df = calc_total(folder, i)
    temp_df.to_csv(f'{folder}/total_{i}.csv', index=False)

So in **medit_emro** we have scores based on how countries scored in the EMRO region

and in **medit_global** we have thier scores based on how they scored across the world

In [23]:
drop_list = ['age','urban','edu']
keep = [
    'female','year','median','score'
]
def aio_v3(df: pd.DataFrame, food_group: str, scoring_scheme: str, pivot: bool=True):
    # Split the input dataframe into all, males, and females.
    
    all_df, males_df, females_df = emro_select(df)
    
    # Apply the scoring based on the gram cutoff.
    all_df = medit_score(all_df, food_group, scoring_scheme)
    males_df = medit_score(males_df, food_group, scoring_scheme)
    females_df = medit_score(females_df, food_group, scoring_scheme)
    
    # # Filter to keep only the EMRO countries (assuming 'emro' is a list defined elsewhere).
    all_df = all_df[keep]
    males_df = males_df[keep]
    females_df = females_df[keep]
    
    if pivot:
        all_df = get_pivots(all_df)
        males_df = get_pivots(males_df)
        females_df = get_pivots(females_df)
    
    return all_df, males_df, females_df

def get_medit_scores(name: str, save: bool, v0_codes: list, scoring_scheme: str):
    def sum_csv_files(file_paths, sum_columns):
        df_sum = None
        non_sum_columns = None
        for file in file_paths:
            df = pd.read_csv(file)
            # Separate the columns to be summed and the remaining columns.
            df_sum_cols = df[sum_columns]
            df_non_sum_cols = df.drop(columns=sum_columns)
            if df_sum is None:
                df_sum = df_sum_cols
                non_sum_columns = df_non_sum_cols  # Retain these columns from the first file.
            else:
                df_sum = df_sum.add(df_sum_cols, fill_value=0)
        final_df = pd.concat([non_sum_columns, df_sum], axis=1)
        return final_df

    # Sum the CSV files using the provided file paths.
    total = sum_csv_files(v0_codes, sum_columns=['median'])
    # Use the provided food group name and scoring scheme.
    all_df, males_df, females_df = aio_v3(total, food_group=name, scoring_scheme=scoring_scheme, pivot=False)
    
    # Create directory for output scores if it doesn't exist.
    os.makedirs('scores/medit_cutoff/global', exist_ok=True)
    if save:
        all_df.to_csv(f'scores/medit_cutoff/global/{name}_all.csv')
        males_df.to_csv(f'scores/medit_cutoff/global/{name}_males.csv')
        females_df.to_csv(f'scores/medit_cutoff/global/{name}_females.csv')
    return all_df, males_df, females_df

# List of tuples with (file paths, food group name, scoring scheme).
ryuk = [
    ([r'..\raw_data\Global estimates\v08_global.csv'], 'whole_grain', 'hi'),
    ([r'..\raw_data\Global estimates\v01_global.csv'], 'fruit', 'hi'),
    ([r'..\raw_data\Global estimates\v02_global.csv',
      r'..\raw_data\Global estimates\v04_global.csv'], 'veg', 'hi'),
    ([r'..\raw_data\Global estimates\v06_global.csv'], 'nuts', 'hi'),
    ([r'..\raw_data\Global estimates\v05_global.csv'], 'legumes', 'hi'),
    ([r'..\raw_data\Global estimates\v57_global.csv',
      r'..\raw_data\Global estimates\v14_global.csv',
      r'..\raw_data\Global estimates\v13_global.csv'], 'dairy', 'low'),
    ([r'..\raw_data\Global estimates\v09_global.csv',
      r'..\raw_data\Global estimates\v10_global.csv'], 'meats', 'low'),
    ([r'..\raw_data\Global estimates\v11_global.csv'], 'sea_food', 'hi')
]

# Loop through each configuration and generate (and optionally save) the scores.
for file_list, food_group, scheme in ryuk:
    get_medit_scores(name=food_group, save=True, v0_codes=file_list, scoring_scheme=scheme)


In [24]:
def div_n_stich(path1, path2):
    df1 = pd.read_csv(path1)
    df2 = pd.read_csv(path2)
    val1 = df1.pop('median')
    val2 = df2.pop('median')

    dived = val1.divide(val2)
    df1['median'] = dived
    return df1

def get_medit_scores_fat (name:str, save:bool, p1,p2, type:str):
    total = div_n_stich(p1,p2)
    all, males, females = aio_v3(total,food_group=name,scoring_scheme=type,pivot=False)

    if save:
        all.to_csv(f'scores\medit_cutoff\global\{name}_all.csv', index=False)
        males.to_csv(f'scores\medit_cutoff\global\{name}_males.csv',index=False)
        females.to_csv(f'scores\medit_cutoff\global\{name}_females.csv', index=False)
    return all, males, females

_,_,_ = get_medit_scores_fat(name='MUFA_div_SFA', save=True,
                             p1=r'..\raw_data\Global estimates\v28_global.csv',
                             p2=r'..\raw_data\Global estimates\v27_global.csv', type='hi')

## a function to sum the scores for countries 

def calc_total(path: str, suff: str):
    all_glob = glob.glob(f'{path}/*_{suff}.csv')
    
    cumulative_df = None

    for i in all_glob:
        temp = pd.read_csv(i)
        
        if cumulative_df is None:
            cumulative_df = temp
        else:
            cumulative_df.iloc[:, 1:] += temp.iloc[:, 1:]
    
    return cumulative_df

folder = 'scores/medit_cutoff/global'  
for i in ['all', 'males', 'females']:
    temp_df = calc_total(folder, i)
    temp_df.to_csv(f'{folder}/total_{i}.csv', index=False)