In [2]:
import pandas as pd
import numpy as np
from IPython.display import display
from src.logConfig import init_logger
logger = init_logger('hei.log')
logger.info('Started')


Started


In [23]:
#load data
data = pd.read_csv('data/full_df.tsv', sep='\t')

  data = pd.read_csv('data/full_df.tsv', sep='\t')


In [74]:
# Dictionaries of the HEI criteria for each food group
#2010,2015,2020

hei_2010 = {
    'fruit_total': {'name': 'Total Fruit', 'goal': 0.8, 'total': 5},
    'fruit_whole': {'name': 'Whole Fruit', 'goal': 0.4, 'total': 5},
    'veg': {'name': 'Vegetables', 'goal': 1.1, 'total': 5},
    'grn_bean': {'name': 'Greens and Beans', 'goal': 0.2, 'total': 5},
    'whl_grn': {'name': 'Whole Grains', 'goal': 1.5, 'total': 10},
    'dairy': {'name': 'Dairy', 'goal': 1.3, 'total': 10},
    'prot': {'name': 'Total Protein', 'goal': 2.5, 'total': 5},
    'sf_plant': {'name': 'Seafood and plant protein', 'goal': 0.3, 'total': 5},
    'fa': {'name': 'MUFA+PUFA / SFA ratio', 'goal': 2.5, 'max':1.2,'total': 10}, #minimum <= 1.2
    'rf_grn': {'name': 'Refined Grains', 'goal': 1.8, 'max':4.3, 'total': 10}, #excess = >4.3
    'sodium': {'name': 'sodium', 'goal': 1.1, 'max':2,'total': 10}, #excess = >2.0
    'empty_cal': {'name': 'empty calories (saturated fats, added sugars, alcohol)', 'goal': .19, 'total': 20} #excess = >.5
}

class HEI:
    def __init__(self, df,cals,hei_dict):
        self.df = df
        self.calories = cals
        self.hei_dict = hei_dict
        self.categories = list(hei_dict.keys())
        self.hei_df = pd.DataFrame()
        self.check_for_columns()
        self.hei_scores = pd.DataFrame()
        
        
    def check_for_columns(self):
        '''
        Checks the dataframe for the columns needed to calculate the HEI
        '''
        found = []
        for key in self.categories:
            if key in self.df.columns:
                self.hei_df[key] = self.df[key]
                found.append(key)
        logger.info(f'Found columns: {found}')
        
    def hei_cols(self, name, cats=[],norm=True):
        '''
        Takes a list of columns corresponding to an HEI category
        and sums them to create a new column for scoring
        '''
        if name not in self.categories:
            logger.info(f'{name} not in hei index. call instructions() to see available categories')
            return
        if name == 'fa':
            logger.info(f'{name} is a ratio and should be calculated with hei_fa()')
            return
        if name == 'empty_cal':
            logger.info(f'{name} is multiplied by type and should be calculated with hei_sofaa()')
            return
            
        
        if norm:
            
            if name == 'sodium':
                ratio = self.df[self.calories] / 1000*1000
            else:
                ratio = self.df[self.calories] / 1000
            self.hei_df[name] = self.df[cats].sum(axis=1)/ratio
            logger.info(f'Created column {name} with columns {cats}')
            return
        
        self.hei_df[name] = self.df[cats].sum(axis=1)
        logger.info(f'Created column {name} with columns {cats}')
        
    def hei_fa(self, mufa, pufa, sfa):
        '''
        Takes the Fatty acids and calculates the ratio of MUFA+PUFA / SFA
        '''
        self.hei_df['fa'] = (self.df[mufa] + self.df[pufa]) / self.df[sfa]
        logger.info('Created column for fa')
        
    def hei_sofaa(self, sug, fat, al,norm=True):
        '''
        Takes the Fatty acids and calculates the ratio of MUFA+PUFA / SFA
        '''
        if norm:
            ratio = self.df[self.calories] / 1000
            alc = self.df[al] - 13
            alc = alc.apply(lambda x: 0 if x < 0 else x)
            self.hei_df['empty_cal'] = (self.df[sug] * 4 + self.df[fat] * 9 + self.df[al] * 7) / ratio
            return
        
        self.hei_df['fa'] = (self.df[sug] * 4) + ( self.df[fat] * 9) + ( self.df[al] * 7)
        logger.info('Created column for sugars. sugars assumed to be in grams.')
        
    def hei_score(self):
        #score = np.zeros(self.df.shape[0])
        for col in self.categories:
            if col == 'empty_cal':
                goal = self.hei_dict[col]['goal']
                total = self.hei_dict[col]['total']
                #score += self.hei_calc_sofaa(col, total)
                self.hei_scores.loc[:,f'hei_{col}'] = self.hei_calc_sofaa(col, total)
                continue
            if col in ['rf_grn','sodium']:
                goal = self.hei_dict[col]['goal']
                total = self.hei_dict[col]['total']
                max = self.hei_dict[col]['max']
                #score += self.hei_calc_min(col,goal, total, max)
                self.hei_scores.loc[:,f'hei_{col}'] = self.hei_calc_min(col,goal, total, max)
                continue
            
            goal = self.hei_dict[col]['goal']
            total = self.hei_dict[col]['total']
            #score += self.hei_calc_max(col, goal, total)
            self.hei_scores.loc[:,f'hei_{col}'] = self.hei_calc(col, goal, total)

        self.hei_scores['hei_score'] = self.hei_scores.sum(axis=1)
    
    def instructions(self):
        '''
        Prints the instructions HEI category names
        '''
        for key, value in self.hei_dict.items():
            print(f'{key}: {value.name}')

    
    def hei_calc(self, cat, goal, total):
        df = self.hei_df
        hei_val = df[cat].copy()
        adjust = total/goal

        hei_val = hei_val.apply(lambda x: total if x >= goal else x * adjust)
        return hei_val
    
    def hei_calc_min(self, cat, goal, total,max):
        df = self.hei_df
        hei_val = df[cat].copy()
        hei_val = hei_val.apply(lambda x: 0 if x >= max else x)
        
        hei_val = hei_val.apply(
            lambda x: total if x <= goal else total * ((x - max)/(max-goal)))
        return hei_val

    def hei_calc_max(self, cat, goal, total,min=0):
        if cat == 'fa':
            min = 1.2
        df = self.hei_df
        hei_val = df[cat].copy()
        #hei_val = hei_val.apply(lambda x: total if x >= min else x)
        
        hei_val = hei_val.apply(
            lambda x: total if x >= goal else total * ((x - min)/(goal-min)))
        return hei_val


    def hei_calc_sofaa(self, cat, total):
        df = self.hei_df
        hei_val = df[cat].copy()
        hei_val = hei_val.apply(lambda x: 0 if x >= .50 else x)
        hei_val = hei_val.apply(
            lambda x: total if x <= .190 else 20 * ((x - .5)/-.31))
        return hei_val

SyntaxError: invalid syntax. Maybe you meant '==' or ':=' instead of '='? (1744368404.py, line 59)

In [75]:
hei_calc = HEI(data, 'DT_KCAL', hei_2010)

hei_calc.hei_cols('fruit_total', ['F_TOT'])
hei_calc.hei_cols('fruit_whole', ['F_SOLID'])
hei_calc.hei_cols('veg', ['V_TOT'])
hei_calc.hei_cols('grn_bean', ['V_DRKGR','LEGUMES'])
hei_calc.hei_cols('whl_grn', ['G_WHL'])
hei_calc.hei_cols('dairy', ['D_TOT'])
hei_calc.hei_cols('prot', ['M_MPF','M_EGG','M_NUTSD','M_SOY','LEGUMES'])
hei_calc.hei_cols('sf_plant', ['M_FISH_HI','M_FISH_LO','M_SOY','LEGUMES','M_NUTSD'])
hei_calc.hei_cols('rf_grn', ['G_NWHL'])
hei_calc.hei_cols('sodium', ['DT_SODI'] )

hei_calc.hei_fa('DT_MFAT', 'DT_PFAT', 'DT_SFAT')
hei_calc.hei_sofaa('ADD_SUG', 'DFAT_SOL', 'DT_ALCO')

Found columns: []
Created column fruit_total with columns ['F_TOT']
Created column fruit_whole with columns ['F_SOLID']
Created column veg with columns ['V_TOT']
Created column grn_bean with columns ['V_DRKGR', 'LEGUMES']
Created column whl_grn with columns ['G_WHL']
Created column dairy with columns ['D_TOT']
Created column prot with columns ['M_MPF', 'M_EGG', 'M_NUTSD', 'M_SOY', 'LEGUMES']
Created column sf_plant with columns ['M_FISH_HI', 'M_FISH_LO', 'M_SOY', 'LEGUMES', 'M_NUTSD']
Created column rf_grn with columns ['G_NWHL']
Created column sodium with columns ['DT_SODI']
Created column for fa


In [76]:
hei_calc.hei_score()

In [77]:
hei_calc.hei_scores['hei_score']

0       82.225521
1       69.838702
2       51.672500
3       57.895659
4       52.491486
          ...    
8254    81.334605
8255    50.182919
8256    94.821414
8257    72.380256
8258    64.640493
Name: hei_score, Length: 8259, dtype: float64

In [79]:
hei_calc.hei_df

Unnamed: 0,fruit_total,fruit_whole,veg,grn_bean,whl_grn,dairy,prot,sf_plant,rf_grn,sodium,fa,empty_cal
0,0.776991,0.441472,1.233178,0.132736,1.406824,2.151440,3.234812,0.618061,4.238131,2829.953144,1.229895,602.963745
1,0.472978,0.259096,0.284498,0.091852,0.365275,0.665522,1.542842,0.160665,1.463131,953.438413,1.684105,163.745790
2,0.103806,0.062543,0.103028,0.011899,0.036851,0.078633,0.190549,0.034262,0.491523,228.801295,1.918364,41.490602
3,0.068405,0.031162,0.271404,0.046142,0.084556,0.252719,1.568251,0.164701,1.086248,858.516048,1.575544,193.763916
4,0.135452,0.049932,0.096663,0.015288,0.088044,0.157001,0.374647,0.070774,0.668023,346.219558,1.618920,86.128069
...,...,...,...,...,...,...,...,...,...,...,...,...
8254,1.646072,1.309081,1.279918,0.476972,0.075499,0.159747,2.352511,0.995798,0.295191,800.097746,2.474359,309.336536
8255,0.098753,0.063078,0.111161,0.004436,0.152524,0.139598,0.209490,0.061629,0.792606,321.390118,1.156070,62.108643
8256,2.449682,2.089435,1.995771,0.336471,1.008693,1.152792,4.527589,0.860278,9.654631,5249.309131,2.307292,495.560435
8257,0.204417,0.183593,0.323804,0.092178,0.364410,1.169582,1.153875,0.410478,1.051583,906.842783,1.574376,169.939889


In [78]:
data['HEI2010_TOTAL_SCORE']

0       49.509999
1       56.829670
2       45.838866
3       34.599663
4       40.947702
          ...    
8254    71.493131
8255    34.687321
8256    56.002908
8257    62.846545
8258    59.748388
Name: HEI2010_TOTAL_SCORE, Length: 8259, dtype: float64