In [2]:
import pandas as pd
import numpy as np
from IPython.display import display
from src.logConfig import init_logger
logger = init_logger('hei.log')
logger.info('Started')


Started


In [86]:
#load data
outcomes = pd.read_csv('data/numom_outcomes.tsv', sep='\t')
ffq = pd.read_csv('data/numom_ffq.tsv', sep='\t')

data = pd.merge(outcomes, ffq, on='StudyID', how='right')
print(data.shape)
data['ADD_SUG'] = data['ADD_SUG'] * 4
data['SODIUM'] = data['DT_SODI'] /1000




(8259, 844)


In [94]:
# Dictionaries of the HEI criteria for each food group
#2010,2015,2020

hei_2010 = {
    'fruit_total': {'name': 'Total Fruit', 'goal': 0.8, 'total': 5},
    'fruit_whole': {'name': 'Whole Fruit', 'goal': 0.4, 'total': 5},
    'veg': {'name': 'Vegetables', 'goal': 1.1, 'total': 5},
    'grn_bean': {'name': 'Greens and Beans', 'goal': 0.2, 'total': 5},
    'whl_grn': {'name': 'Whole Grains', 'goal': 1.5, 'total': 10},
    'dairy': {'name': 'Dairy', 'goal': 1.3, 'total': 10},
    'prot': {'name': 'Total Protein', 'goal': 2.5, 'total': 5},
    'sf_plant': {'name': 'Seafood and plant protein', 'goal': 0.3, 'total': 5},
    'fa': {'name': 'MUFA+PUFA / SFA ratio', 'goal': 2.5, 'max':1.2,'total': 10}, #minimum <= 1.2
    'rf_grn': {'name': 'Refined Grains', 'goal': 1.8, 'max':4.3, 'total': 10}, #excess = >4.3
    'sodium': {'name': 'sodium', 'goal': 1.1, 'max':2,'total': 10}, #excess = >2.0
    'empty_cal': {'name': 'empty calories (saturated fats, added sugars, alcohol)', 'goal': .19, 'total': 20} #excess = >.5
}

class HEI:
    def __init__(self, df,cals,hei_dict):
        self.data = df.copy()
        self.df = df.copy()
        self.calories = cals
        self.hei_dict = hei_dict
        self.categories = list(hei_dict.keys())
        self.hei_df = pd.DataFrame()
        self.check_for_columns()
        self.hei_scores = pd.DataFrame()
        self.energy_adjust(cals)
        
    def check_for_columns(self):
        '''
        Checks the dataframe for the columns needed to calculate the HEI
        '''
        found = []
        for key in self.categories:
            if key in self.df.columns:
                self.hei_df[key] = self.df[key]
                found.append(key)
        logger.info(f'Found columns: {found}')
    
    def energy_adjust(self, cat):
        """Adjust the FFQ data to account for energy intake"""
        self.df['KCAL_NORM'] = self.data['DT_KCAL'] / 1000
        self.hei_df['KCAL_NORM'] = self.df['KCAL_NORM']
        
        
    def hei_cols(self, name, cats=[]):
        '''
        Takes a list of columns corresponding to an HEI category
        and sums them to create a new column for scoring
        '''
        if name not in self.categories:
            logger.info(f'{name} not in hei index. call instructions() to see available categories')
            return
        if name == 'fa':
            logger.info(f'{name} is a ratio and should be calculated with hei_fa()')
            return
        if name == 'empty_cal':
            logger.info(f'{name} is multiplied by type and should be calculated with hei_sofaa()')
            return
        
        self.hei_df[name] = self.df[cats].sum(axis=1)
        logger.info(f'Created column {name} with columns {cats}')
        
    def hei_fa(self, mufa, pufa, sfa):
        '''
        Takes the Fatty acids and calculates the ratio of MUFA+PUFA / SFA
        '''
        self.hei_df['fa'] = (self.data[mufa] + self.data[pufa]) / self.data[sfa]
        logger.info('Created column for fa')
        
    def hei_sofaa(self, sug, fat, al,norm=True):
        '''
        Takes the Fatty acids and calculates the ratio of MUFA+PUFA / SFA
        '''
        alc = self.data[al]/self.data['KCAL_NORM']
        sug = self.data[sug]/self.data['KCAL_NORM']
        fat = self.data[fat]/self.data['KCAL_NORM']
        
        alc = alc.apply(lambda x: 0 if x<13 else x-13)
        self.hei_df['empty_cal'] = ((sug * 4) + (fat * 9) + (alc* 7))
        logger.info('Created column for sugars. sugars assumed to be in grams.')
        
    def hei_score(self):
        #score = np.zeros(self.df.shape[0])
        self.hei_scores = pd.DataFrame()
        for col in self.categories:
            if col == 'empty_cal':
                goal = self.hei_dict[col]['goal']
                total = self.hei_dict[col]['total']
                #score += self.hei_calc_sofaa(col, total)
                self.hei_scores.loc[:,f'hei_{col}'] = self.hei_calc_sofaa(col, total)
                continue
            if col in ['rf_grn','sodium']:
                goal = self.hei_dict[col]['goal']
                total = self.hei_dict[col]['total']
                max = self.hei_dict[col]['max']
                #score += self.hei_calc_min(col,goal, total, max)
                self.hei_scores.loc[:,f'hei_{col}'] = self.hei_calc_min(col,goal, total, max)
                continue
            
            if col == 'fa':
                goal = self.hei_dict[col]['goal']
                total = self.hei_dict[col]['total']
                self.hei_scores.loc[:,f'hei_{col}'] = self.hei_calc_fa(col, goal, total)
                continue
            
            goal = self.hei_dict[col]['goal']
            total = self.hei_dict[col]['total']
            #score += self.hei_calc_max(col, goal, total)
            self.hei_scores.loc[:,f'hei_{col}'] = self.hei_calc(col, goal, total)

        self.hei_scores['hei_score'] = self.hei_scores.sum(axis=1)
    
    def instructions(self):
        '''
        Prints the instructions HEI category names
        '''
        for key, value in self.hei_dict.items():
            print(f'{key}: {value.name}')
    
    def hei_calc(self, cat, goal, total):
        df = self.hei_df
        hei_val = (df[cat]/df['KCAL_NORM']).copy()
        hei_val = (hei_val/goal) * total
        hei_s = hei_val.apply(lambda x: total if x >= total else x)
        return hei_s
    

    def hei_calc_fa(self, cat, goal, total):
        df = self.hei_df
        hei_val = df[cat].copy()
        min = 1.2
        
        hei_val = hei_val.apply(
            lambda x: total if x >= goal else 0 if x<= min else
            total * ((x - min)/(goal-min)))
        return hei_val
    
    def hei_calc_min(self, cat, goal, total,min):
        df = self.hei_df
        hei_val = (df[cat]/df['KCAL_NORM']).copy()
        
        hei_val = hei_val.apply(
            lambda x: total if x <= goal else 0 if x>= min else
            (total * ((x - min)/(goal-min))))
        return hei_val


    def hei_calc_sofaa(self, cat, total):
        df = self.hei_df
        hei_val = (df[cat]/1000).copy()
        display(hei_val)
        
        #hei_val = hei_val.apply(lambda x: 0 if x >= .50 else x)
        hei_val = hei_val.apply(
            lambda x: total if x <= .190 else 0 if x>=.5 else 20 * ((x - .5)/-.31))
        return hei_val

In [89]:
hei_calc = HEI(data, 'DT_KCAL', hei_2010)

hei_calc.hei_cols('fruit_total', ['F_TOT'])
hei_calc.hei_cols('fruit_whole', ['F_SOLID'])
hei_calc.hei_cols('veg', ['V_TOT'])
hei_calc.hei_cols('grn_bean', ['V_DRKGR','LEGUMES'])
hei_calc.hei_cols('whl_grn', ['G_WHL'])
hei_calc.hei_cols('dairy', ['D_TOT'])
hei_calc.hei_cols('prot', ['M_MPF','M_EGG','M_NUTSD','M_SOY','LEGUMES'])
hei_calc.hei_cols('sf_plant', ['M_FISH_HI','M_FISH_LO','M_SOY','LEGUMES','M_NUTSD'])
hei_calc.hei_cols('rf_grn', ['G_NWHL'])
hei_calc.hei_cols('sodium', ['SODIUM'] )

hei_calc.hei_fa('DT_MFAT', 'DT_PFAT', 'DT_SFAT')
hei_calc.hei_sofaa('ADD_SUG', 'DFAT_SOL', 'DT_ALCO')

Found columns: []
Created column fruit_total with columns ['F_TOT']
Created column fruit_whole with columns ['F_SOLID']
Created column veg with columns ['V_TOT']
Created column grn_bean with columns ['V_DRKGR', 'LEGUMES']
Created column whl_grn with columns ['G_WHL']
Created column dairy with columns ['D_TOT']
Created column prot with columns ['M_MPF', 'M_EGG', 'M_NUTSD', 'M_SOY', 'LEGUMES']
Created column sf_plant with columns ['M_FISH_HI', 'M_FISH_LO', 'M_SOY', 'LEGUMES', 'M_NUTSD']
Created column rf_grn with columns ['G_NWHL']
Created column sodium with columns ['SODIUM']
Created column for fa


In [90]:
hei_calc.hei_score()

0       0.409521
1       0.396097
2       0.402617
3       0.502189
4       0.448003
          ...   
8254    0.352539
8255    0.448949
8256    0.244209
8257    0.334818
8258    0.343363
Length: 8259, dtype: float64

In [92]:
hei_calc.hei_scores.hei_empty_cal.describe()

count    8259.000000
mean       11.694726
std         5.119874
min         0.000000
25%         8.452684
50%        12.289426
75%        15.525899
max        20.000000
Name: hei_empty_cal, dtype: float64

In [93]:
hei_calc.hei_scores

Unnamed: 0,hei_fruit_total,hei_fruit_whole,hei_veg,hei_grn_bean,hei_whl_grn,hei_dairy,hei_prot,hei_sf_plant,hei_fa,hei_rf_grn,hei_sodium,hei_empty_cal,hei_score
0,2.830674,3.216675,3.267362,1.934294,5.466918,9.646727,3.771144,5.000000,0.229962,7.318374,3.893559,5.837332,52.413022
1,4.147392,4.543867,1.814307,3.221691,3.416513,7.182463,4.329183,3.756857,3.723882,8.988965,7.359283,6.703425,59.187827
2,4.027373,4.852985,2.907031,1.846551,1.525032,3.754751,2.365679,3.544733,5.525881,4.995448,6.441238,6.282806,48.069507
3,0.759716,0.692186,2.192177,2.049826,1.001700,3.454435,5.000000,4.877845,2.888803,9.479035,5.271491,0.000000,37.667213
4,3.411801,2.515428,1.770753,1.540273,2.365515,4.867184,3.019754,4.753776,3.222461,6.431117,6.718780,3.354650,43.971490
...,...,...,...,...,...,...,...,...,...,...,...,...,...
8254,5.000000,5.000000,5.000000,5.000000,0.884214,2.158719,5.000000,5.000000,9.802761,10.000000,6.604848,9.513590,68.964131
8255,2.714382,3.467588,2.222150,0.487736,4.471862,4.722566,1.842619,4.517263,0.000000,3.256887,6.517423,3.293603,37.514078
8256,5.000000,5.000000,3.379656,3.133808,2.505257,3.303635,3.373507,5.000000,8.517628,2.812669,0.492999,16.502656,59.021815
8257,2.168681,3.895526,2.498385,3.911727,4.123809,10.000000,3.917312,5.000000,2.879818,10.000000,5.118563,10.656909,64.170730


In [85]:
data['HEIX12_SOFAAS']

0        5.687912
1        6.703425
2        6.282806
3        0.000000
4        3.304224
          ...    
8254    12.042590
8255     3.293603
8256    16.502656
8257     9.979617
8258    11.060429
Name: HEIX12_SOFAAS, Length: 8259, dtype: float64

In [10]:
(data['F_TOT']/(data['DT_KCAL']/1000)/0.8) * 5

0        4.856192
1        2.956110
2        0.648789
3        0.427532
4        0.846573
          ...    
8254    10.287948
8255     0.617204
8256    15.310515
8257     1.277604
8258     0.867984
Length: 8259, dtype: float64

In [56]:
(data['DT_MFAT'] + data['DT_PFAT']) / data['DT_SFAT']

0       1.229895
1       1.684105
2       1.918364
3       1.575544
4       1.618920
          ...   
8254    2.474359
8255    1.156070
8256    2.307292
8257    1.574376
8258    1.350932
Length: 8259, dtype: float64