In [1]:
import numpy as np 
import pandas as pd 
import os 
import glob
import warnings
warnings.filterwarnings('ignore')    ## I don't like pandas setting with copy warnings 

In [2]:
emro = ['AFG', 'ARE', 'BHR', 'DJI','EGY','IRN','IRQ','JOR',
        'KWT', 'LBN', 'LBR', 'MAR', 'OMN', 'PAK','PSE' , 'QAT','SAU',
        'SDN','SOM', 'SYR', 'YEM','TUN']
drop_list = ['superregion2','age','urban','edu']

def select(col, val,df):
        df = df[df[col] == val]
        return df

def emro_select(df:pd.DataFrame):
    
    d1 = select('age',999,df)
    d2 = select('edu',999,d1)
    d3 = select('urban',999,d2)
    d3 = d3.drop(drop_list,axis=1)

    all = select('female',999,d3)
    males = select('female',0,d3)
    females = select('female',1,d3)
        
    return all, males, females 

In [3]:
### functions for calculating EAT_lancet scores

def eat_score(df, min_val, max_val):
    result = []
    for index, row in df.iterrows():
        country = row['iso3']
        year = row['year']
        median = row['median']

        if min_val <= median <= max_val:
            in_range = 1
        else:
            in_range = 0

        result.append({
            'Country': country,
            'Year': year,
            'EAT_Lancet_score': in_range
        })
    result_df = pd.DataFrame(result)
    return result_df

def get_pivots(df):
    cf = df.pivot_table(index='Year',columns='Country',values='EAT_Lancet_score').transpose()
    return cf 

def aio (df, min, max):
    all, males, females = emro_select(df)
    all = eat_score(all,min,max)
    males = eat_score(males,min,max)
    females = eat_score(females,min,max)

    all = get_pivots(all)
    males = get_pivots(males)
    females = get_pivots(females)
    
    return all, males, females

def sum_csv_files(file_paths, sum_columns):
        df_sum = None
        non_sum_columns = None
        
        for file in file_paths:
            df = pd.read_csv(file)
            
            # separate the columns to sum and the other columns
            df_sum_cols = df[sum_columns]
            df_non_sum_cols = df.drop(columns=sum_columns)
            
            if df_sum is None:
                df_sum = df_sum_cols
                non_sum_columns = df_non_sum_cols  # keep the non-summed columns fromm the first file
            else:
                # Sum the specified columns, aligned by index
                df_sum = df_sum.add(df_sum_cols, fill_value=0)
        
        # Concatenate the non-summed columns back with the summed columns
        final_df = pd.concat([non_sum_columns, df_sum], axis=1)
        
        return final_df

def calc_for_item (v0_codes:list, name:str, min, max):
    total = sum_csv_files(v0_codes,sum_columns=['median'])
    all, males, females = aio(total,min=min, max=max)

    all = all[all.index.isin(emro)]
    males = males[males.index.isin(emro)]
    females = females[females.index.isin(emro)]
    
    os.makedirs('scores\eat',exist_ok=True)
    all.to_csv(f'scores\eat\{name}_all.csv')
    males.to_csv(f'scores\eat\{name}_males.csv')
    females.to_csv(f'scores\eat\{name}_females.csv')
    return all, males, females



In [4]:
naruto = [([r'..\raw_data\Country-level estimates\v08_cnty.csv'],'whole_grain',232,464),
           ([r'..\raw_data\Country-level estimates\v01_cnty.csv'],'fruit',100,300),
           ([r'..\raw_data\Country-level estimates\v02_cnty.csv'],'veg',200,600),
             ([r'..\raw_data\Country-level estimates\v06_cnty.csv'],'nuts', 25,100),
             ([r'..\raw_data\Country-level estimates\v05_cnty.csv'],'legumes',50,100),
             ([r'..\raw_data\Country-level estimates\v27_cnty.csv',
               r'..\raw_data\Country-level estimates\v28_cnty.csv',
               r'..\raw_data\Country-level estimates\v29_cnty.csv',
               r'..\raw_data\Country-level estimates\v30_cnty.csv',
               r'..\raw_data\Country-level estimates\v31_cnty.csv',
               r'..\raw_data\Country-level estimates\v33_cnty.csv'],'fat',20,91.8),
                 ([r'..\raw_data\Country-level estimates\v03_cnty.csv',
                   r'..\raw_data\Country-level estimates\v04_cnty.csv'],'pot_starch',50,100),
                 ([r'..\raw_data\Country-level estimates\v35_cnty.csv'],'sugar',0,32),
                 ([r'..\raw_data\Country-level estimates\v12_cnty.csv'],'eggs',13,25),
                 ([r'..\raw_data\Country-level estimates\v57_cnty.csv',
                   r'..\raw_data\Country-level estimates\v14_cnty.csv',
                   r'..\raw_data\Country-level estimates\v13_cnty.csv'],'dairy',250,500),
                   ([r'..\raw_data\Country-level estimates\v10_cnty.csv'],'red_meat',14,28),
                     ([r'..\raw_data\Country-level estimates\v11_cnty.csv'],'sea_food',28,100)]
a = []
m = []
f = []

for i, j, k,l in naruto:
    a_temp,m_temp,f_temp = calc_for_item(v0_codes=i, name=j, min=k, max=l)
    a.append(a_temp)
    m.append(m_temp)
    f.append(f_temp)


In [5]:
## a function to sum the scores for countries 

def calc_total(path: str, suff: str):
    all_glob = glob.glob(f'{path}/*_{suff}.csv')
    
    cumulative_df = None

    for i in all_glob:
        temp = pd.read_csv(i)
        
        if cumulative_df is None:
            cumulative_df = temp
        else:
            cumulative_df.iloc[:, 1:] += temp.iloc[:, 1:]
    
    return cumulative_df

folder = 'scores/eat'  
for i in ['all', 'males', 'females']:
    temp_df = calc_total(folder, i)
    temp_df.to_csv(f'{folder}/total_{i}.csv', index=False)

Calculating the eat-lancet score for global estimations

In [6]:
def select(col, val,df):
        df = df[df[col] == val]
        return df

def not_emro_select(df:pd.DataFrame):  
    d1 = select('age',999,df)
    d2 = select('edu',999,d1)
    d3 = select('urban',999,d2)

    all = select('female',999,d3)
    males = select('female',0,d3)
    females = select('female',1,d3)
        
    return all, males, females 


def eat_score(df, min_val, max_val):
    result = []
    for index, row in df.iterrows():
        year = row['year']
        median = row['median']

        if min_val <= median <= max_val:
            in_range = 1
        else:
            in_range = 0

        result.append({
            'Country': 'global',
            'Year': year,
            'EAT_Lancet_score': in_range
        })
    result_df = pd.DataFrame(result)
    return result_df

def get_pivots(df):
    cf = df.pivot_table(index='Year',columns='Country',values='EAT_Lancet_score').transpose()
    return cf 

def aio (df, min, max):
    all, males, females = not_emro_select(df)
    all = eat_score(all,min,max)
    males = eat_score(males,min,max)
    females = eat_score(females,min,max)

    all = get_pivots(all)
    males = get_pivots(males)
    females = get_pivots(females)
    
    return all, males, females

def sum_csv_files(file_paths, sum_columns):
        df_sum = None
        non_sum_columns = None
        
        for file in file_paths:
            df = pd.read_csv(file)
            
            # separate the columns to sum and the other columns
            df_sum_cols = df[sum_columns]
            df_non_sum_cols = df.drop(columns=sum_columns)
            
            if df_sum is None:
                df_sum = df_sum_cols
                non_sum_columns = df_non_sum_cols  # keep the non-summed columns fromm the first file
            else:
                # Sum the specified columns, aligned by index
                df_sum = df_sum.add(df_sum_cols, fill_value=0)
        
        # Concatenate the non-summed columns back with the summed columns
        final_df = pd.concat([non_sum_columns, df_sum], axis=1)
        
        return final_df

def calc_for_item_global (v0_codes:list, name:str, min, max):
    total = sum_csv_files(v0_codes,sum_columns=['median'])
    all, males, females = aio(total,min=min, max=max)
    
    os.makedirs('scores\eat_global',exist_ok=True)
    all.to_csv(f'scores\eat_global\{name}_all.csv')
    males.to_csv(f'scores\eat_global\{name}_males.csv')
    females.to_csv(f'scores\eat_global\{name}_females.csv')
    return all, males, females

jiraya = [([r'..\raw_data\Global estimates\v08_global.csv'],'whole_grain',232,464),
           ([r'..\raw_data\Global estimates\v01_global.csv'],'fruit',100,300),
           ([r'..\raw_data\Global estimates\v02_global.csv'],'veg',200,600),
             ([r'..\raw_data\Global estimates\v06_global.csv'],'nuts', 25,100),
             ([r'..\raw_data\Global estimates\v05_global.csv'],'legumes',50,100),
             ([r'..\raw_data\Global estimates\v27_global.csv',
               r'..\raw_data\Global estimates\v28_global.csv',
               r'..\raw_data\Global estimates\v29_global.csv',
               r'..\raw_data\Global estimates\v30_global.csv',
               r'..\raw_data\Global estimates\v31_global.csv',
               r'..\raw_data\Global estimates\v33_global.csv'],'fat',20,91.8),
                 ([r'..\raw_data\Global estimates\v03_global.csv',
                   r'..\raw_data\Global estimates\v04_global.csv'],'pot_starch',50,100),
                 ([r'..\raw_data\Global estimates\v35_global.csv'],'sugar',0,32),
                 ([r'..\raw_data\Global estimates\v12_global.csv'],'eggs',13,25),
                 ([r'..\raw_data\Global estimates\v57_global.csv',
                   r'..\raw_data\Global estimates\v14_global.csv',
                   r'..\raw_data\Global estimates\v13_global.csv'],'dairy',250,500),
                   ([r'..\raw_data\Global estimates\v10_global.csv'],'red_meat',14,28),
                     ([r'..\raw_data\Global estimates\v11_global.csv'],'sea_food',28,100)]
a = []
m = []
f = []

for i, j, k,l in jiraya:
    a_temp,m_temp,f_temp = calc_for_item_global(v0_codes=i, name=j, min=k, max=l)
    a.append(a_temp)
    m.append(m_temp)
    f.append(f_temp)

## a function to sum the scores for countries 

def calc_total(path: str, suff: str):
    all_glob = glob.glob(f'{path}/*_{suff}.csv')
    
    cumulative_df = None

    for i in all_glob:
        temp = pd.read_csv(i)
        
        if cumulative_df is None:
            cumulative_df = temp
        else:
            cumulative_df.iloc[:, 1:] += temp.iloc[:, 1:]
    
    return cumulative_df

folder = 'scores/eat_global'  
for i in ['all', 'males', 'females']:
    temp_df = calc_total(folder, i)
    temp_df.to_csv(f'{folder}/total_{i}.csv', index=False)