In [2]:
import pandas as pd
import numpy as np
from IPython.display import display
from src.logConfig import init_logger
logger = init_logger('hei.log')
logger.info('Started')


Started


In [3]:
#load data
data = pd.read_csv('data/full_df.tsv', sep='\t')

  data = pd.read_csv('data/full_df.tsv', sep='\t')


In [7]:
# Dictionaries of the HEI criteria for each food group
#2010,2015,2020

hei_2010 = {
    'fruit_total': {'name': 'Total Fruit', 'goal': 0.8, 'total': 5},
    'fruit_whole': {'name': 'Whole Fruit', 'goal': 0.4, 'total': 5},
    'veg': {'name': 'Vegetables', 'goal': 1.1, 'total': 5},
    'grn_bean': {'name': 'Greens and Beans', 'goal': 0.2, 'total': 5},
    'whl_grn': {'name': 'Whole Grains', 'goal': 1.5, 'total': 10},
    'dairy': {'name': 'Dairy', 'goal': 1.3, 'total': 10},
    'prot': {'name': 'Total Protein', 'goal': 2.5, 'total': 5},
    'sf_plant': {'name': 'Seafood and plant protein', 'goal': 0.3, 'total': 5},
    'fa': {'name': 'MUFA+PUFA / SFA ratio', 'goal': 2.5, 'total': 10}, #minimum <= 1.2
    'rf_grn': {'name': 'Refined Grains', 'goal': 1.8, 'total': 10}, #excess = >4.3
    'sodium': {'name': 'sodium', 'goal': 1.1, 'total': 10}, #excess = >2.0
    'empty_cal': {'name': 'empty calories (saturated fats, added sugars, alcohol)', 'goal': .19, 'total': 20} #excess = >.5
}

class HEI:
    def __init__(self, df, hei_dict):
        self.df = df
        self.hei_dict = hei_dict
        self.categories = list(hei_dict.keys())
        self.hei_df = pd.DataFrame()
        self.check_for_columns()
        
        
    def check_for_columns(self):
        '''
        Checks the dataframe for the columns needed to calculate the HEI
        '''
        found = []
        for key in self.categories:
            if key in self.df.columns:
                self.hei_df[key] = self.df[key]
                found.append(key)
        logger.info(f'Found columns: {found}')
        
    def hei_cols(self, name, cats=[]):
        '''
        Takes a list of columns corresponding to an HEI category
        and sums them to create a new column for scoring
        '''
        if name not in self.categories:
            logger.info(f'{name} not in hei index. call instructions() to see available categories')
        
        self.hei_df[name] = self.df[cats].sum(axis=1)
        logger.info(f'Created column {name} with columns {cats}')
        
    def hei_fa(self, mufa, pufa, sfa):
        '''
        Takes the Fatty acids and calculates the ratio of MUFA+PUFA / SFA
        '''
        self.hei_df['fa'] = (self.df[mufa] + self.df[pufa]) / self.df[sfa]
        logger.info('Created column for fa')
        
    
    def instructions(self):
        '''
        Prints the instructions HEI category names
        '''
        for key, value in self.hei_dict.items():
            print(f'{key}: {value.name}')

    
    def hei_calc(self, df, cat, goal, total):
        hei_val = df[cat].copy()
        adjust = total/goal

        hei_val = hei_val.apply(lambda x: total if x >= goal else x * adjust)
        return hei_val


    def hei_calc_sofaa(self, df, cat, goal, total):
        hei_val = df[cat].copy()

        hei_val = hei_val.apply(lambda x: 0 if x >= .50 else x)
        hei_val = hei_val.apply(
            lambda x: total if x <= .190 else 20 * ((x - .5)/-.31))
        return hei_val

In [9]:
hei_calc = HEI(data, hei_2010)

hei_calc.hei_cols('fruit_total', ['F_TOT'])
hei_calc.hei_cols('fruit_whole', ['F_WHOLE'])

Found columns: []
Created column fruit_total with columns ['F_TOT']
