In [1]:
import numpy as np 
import pandas as pd 
import os 
import glob
import warnings
warnings.filterwarnings('ignore')    ## I don't like pandas setting with copy warnings 

In [2]:
### FYI: GDD has no data for Somalia. I have included it in my list tho

emro = ['AFG', 'ARE', 'BHR', 'DJI','EGY','IRN','IRQ','JOR',
        'KWT', 'LBN', 'LBR', 'MAR', 'OMN', 'PAK','PSE' , 'QAT','SAU',
        'SDN','SOM', 'SYR', 'YEM','TUN']
drop_list = ['superregion2','age','urban','edu']

def select(col, val,df):
        df = df[df[col] == val]
        return df

def emro_select(df:pd.DataFrame):
    d1 = select('age',999,df)
    d2 = select('edu',999,d1)
    d3 = select('urban',999,d2)
    d3 = d3.drop(drop_list,axis=1)
    # d4 = d3[d3['iso3'].isin(emro)]

    all = select('female',999,d3)
    males = select('female',0,d3)
    females = select('female',1,d3)
        
    return all, males, females 

def pos_scores(df, value_column):
    med = df[value_column].median()
    def get_score(value):
        if value < med:
            return 1
        elif value >= med:
            return 0
        else:
            raise ValueError
        
    df['score'] = df[value_column].apply(get_score)
    return df

def neg_scores(df, value_column):

    med = df[value_column].median()
    def get_score(value):
        if value < med:
            return 0
        elif value >= med:
            return 1
        else:
            raise ValueError
        
    df['score'] = df[value_column].apply(get_score)
    return df

def medit_score(data: pd.DataFrame, type: str) -> pd.DataFrame:
    years = data['year'].unique()
    scores = []

    for i in years:
        year_df = select('year', i, data)  
        
        
        if type == 'low':
            year_df = pos_scores(year_df,'median')  
            scores.append(year_df)
        elif type == 'hi':
            year_df = neg_scores(year_df,'median')  
            scores.append(year_df)
        else:
            print('Fix your type')  
            return pd.DataFrame() 

    scores_df = pd.concat(scores, axis=0)
    return scores_df

def get_pivots(df):
    cf = df.pivot_table(index='year', columns='iso3', values='score').transpose()
    return cf 


In [3]:
def aio_v2 (df:pd.DataFrame, type:str, pivot:bool=True):
    all, males, females = emro_select(df)

    all = medit_score(all,type)
    males = medit_score(males,type)
    females = medit_score(females,type)
    
    ###! next three lines are there to keep only the emro countries... this is for the global calculation that I will do next 
    ###? but these lines won't affect this emro region calculation; but I will comment them out anyway.
    
    all = all[all['iso3'].isin(emro)]
    females = females[females['iso3'].isin(emro)]
    males = males[males['iso3'].isin(emro)]
    
    if pivot == True:
        all = get_pivots(all)
        males = get_pivots(males)
        females = get_pivots(females)
    
    return all, males, females

def get_medit_scores (name:str, save:bool, v0_codes:list, type:str):
    def sum_csv_files(file_paths, sum_columns):
        df_sum = None
        non_sum_columns = None
        
        for file in file_paths:
            df = pd.read_csv(file)
            
            # Separate the columns to sum and the other columns
            df_sum_cols = df[sum_columns]
            df_non_sum_cols = df.drop(columns=sum_columns)
            
            if df_sum is None:
                df_sum = df_sum_cols
                non_sum_columns = df_non_sum_cols  # Keep the non-summed columns from the first file
            else:
                # Sum the specified columns, aligned by index
                df_sum = df_sum.add(df_sum_cols, fill_value=0)
        
        # Concatenate the non-summed columns back with the summed columns
        final_df = pd.concat([non_sum_columns, df_sum], axis=1)
        
        return final_df

    total = sum_csv_files(v0_codes,sum_columns=['median'])
    all, males, females = aio_v2(total,type=type,pivot=True)
    os.makedirs('scores\medit_global',exist_ok=True)

    if save:
        all.to_csv(f'scores\medit_global\{name}_all.csv')
        males.to_csv(f'scores\medit_global\{name}_males.csv')
        females.to_csv(f'scores\medit_global\{name}_females.csv')
    return all, males, females


In [4]:
ryuk = [([r'..\raw_data\Country-level estimates\v08_cnty.csv'],'whole_grain','hi'), 
           ([r'..\raw_data\Country-level estimates\v01_cnty.csv'],'fruit','hi' ),
           ([r'..\raw_data\Country-level estimates\v02_cnty.csv',
             r'..\raw_data\Country-level estimates\v04_cnty.csv'],'veg','hi'),
             ([r'..\raw_data\Country-level estimates\v06_cnty.csv'],'nuts', 'hi'),
             ([r'..\raw_data\Country-level estimates\v05_cnty.csv'],'legumes','hi'),
                 ([r'..\raw_data\Country-level estimates\v57_cnty.csv',
                   r'..\raw_data\Country-level estimates\v14_cnty.csv',
                   r'..\raw_data\Country-level estimates\v13_cnty.csv'],'dairy','low'),
                   ([r'..\raw_data\Country-level estimates\v09_cnty.csv',
                     r'..\raw_data\Country-level estimates\v10_cnty.csv'],'meats','low'),
                     ([r'..\raw_data\Country-level estimates\v11_cnty.csv'],'sea_food','hi')]

for i, j, k in ryuk:
    _,_,_ = get_medit_scores(name=j,save=True,v0_codes=i,type=k)

In [5]:
def div_n_stich(path1, path2):
    df1 = pd.read_csv(path1)
    df2 = pd.read_csv(path2)
    val1 = df1.pop('median')
    val2 = df2.pop('median')

    dived = val1.divide(val2)
    df1['median'] = dived
    return df1

def get_medit_scores_fat (name:str, save:bool, p1,p2, type:str):
    total = div_n_stich(p1,p2)
    all, males, females = aio_v2(total,type=type,pivot=True)

    if save:
        all.to_csv(f'scores\medit_global\{name}_all.csv')
        males.to_csv(f'scores\medit_global\{name}_males.csv')
        females.to_csv(f'scores\medit_global\{name}_females.csv')
    return all, males, females

_,_,_ = get_medit_scores_fat(name='MUFA_div_SFA', save=True,
                             p1=r'..\raw_data\Country-level estimates\v28_cnty.csv',
                             p2=r'..\raw_data\Country-level estimates\v27_cnty.csv', type='hi')

In [6]:
### just to make sure the above code is working fine, i will calculate all_genders file manually

mufa = pd.read_csv(r'..\raw_data\Country-level estimates\v28_cnty.csv')
sfa = pd.read_csv(r'..\raw_data\Country-level estimates\v27_cnty.csv')

mufa_all, mufa_m, mufa_f = emro_select(mufa)
sfa_all, sfa_m, sfa_m = emro_select(sfa)

pre_all = mufa_all['median'].divide(sfa_all['median'])
allz = pd.concat([pre_all,mufa_all[['iso3','year']]],axis=1)
all_score = medit_score(allz,'hi')
all_score_piv = get_pivots(all_score)
all_score_piv

year,1990,1995,2000,2005,2010,2015,2018
iso3,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
AFG,0.0,0.0,0.0,0.0,0.0,0.0,0.0
AGO,0.0,0.0,0.0,0.0,0.0,0.0,0.0
ALB,1.0,1.0,1.0,1.0,1.0,1.0,1.0
ARE,1.0,1.0,0.0,1.0,1.0,1.0,1.0
ARG,1.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...
WSM,0.0,0.0,0.0,0.0,0.0,0.0,0.0
YEM,1.0,1.0,1.0,1.0,1.0,1.0,1.0
ZAF,1.0,1.0,1.0,0.0,1.0,1.0,1.0
ZMB,0.0,0.0,0.0,0.0,0.0,0.0,0.0


it's exactly the same as the mufa_div_sfa_all.csv

In [7]:
## a function to sum the scores for countries 

def calc_total(path: str, suff: str):
    all_glob = glob.glob(f'{path}/*_{suff}.csv')
    
    cumulative_df = None

    for i in all_glob:
        temp = pd.read_csv(i)
        
        if cumulative_df is None:
            cumulative_df = temp
        else:
            cumulative_df.iloc[:, 1:] += temp.iloc[:, 1:]
    
    return cumulative_df

folder = 'scores/medit_global'  
for i in ['all', 'males', 'females']:
    temp_df = calc_total(folder, i)
    temp_df.to_csv(f'{folder}/total_{i}.csv', index=False)

So in **medit_emro** we have scores based on how countries scored in the EMRO region

and in **medit_global** we have thier scores based on how they scored across the world