# Map fiber content in ASA24 foods from FNDDS 2018

In [1]:
import numpy as np
import pandas as pd

In [3]:
# Load data from FNDDS ingredients containing carobhydrate values
nutrient_values = pd.read_csv('fndds_2018_ingredient_carbohydrate_values.csv')
nutrient_values['Nutrient description'].unique()

array(['Protein', 'Total Fat', 'Carbohydrate', 'Energy', 'Alcohol',
       'Water', 'Caffeine', 'Theobromine', 'Sugars, total',
       'Fiber, total dietary', 'Calcium', 'Iron', 'Magnesium',
       'Phosphorus', 'Potassium', 'Sodium', 'Zinc', 'Copper', 'Selenium',
       'Retinol', 'Vitamin A, RAE', 'Carotene, beta', 'Carotene, alpha',
       'Vitamin E (alpha-tocopherol)', 'Vitamin D (D2 + D3)',
       'Cryptoxanthin, beta', 'Lycopene', 'Lutein + zeaxanthin',
       'Vitamin C', 'Thiamin', 'Riboflavin', 'Niacin', 'Vitamin B-6',
       'Folate, total', 'Vitamin B-12', 'Choline, total',
       'Vitamin K (phylloquinone)', 'Folic acid', 'Folate, food',
       'Folate, DFE', 'Vitamin E, added', 'Vitamin B-12, added',
       'Cholesterol', 'Fatty acids, total saturated', '4:0', '6:0', '8:0',
       '10:0', '12:0', '14:0', '16:0', '18:0', '18:1', '18:2', '18:3',
       '20:4', '22:6 n-3', '16:1', '18:4', '20:1', '20:5 n-3', '22:1',
       '22:5 n-3', 'Fatty acids, total monounsaturated',
  

## Extract nutrients of interest for ingredients in diet recalls

In [4]:
fiber = nutrient_values[nutrient_values['Nutrient description'] == 'Fiber, total dietary']
fiber = fiber.rename(columns={'Nutrient value':'Fiber (g)'})
fiber = fiber.drop(columns=['Ingredient description', 'Nutrient code', 'Nutrient description'])
fiber = fiber.reset_index()

In [5]:
energy =  nutrient_values[nutrient_values['Nutrient description'] == 'Energy']
energy = energy.rename(columns={'Nutrient value':'Energy (kcal)'})
energy = energy.drop(columns=['Ingredient description', 'Nutrient code', 'Nutrient description'])
energy = energy.reset_index()

In [6]:
carbs = nutrient_values[nutrient_values['Nutrient description'] == 'Carbohydrate']
carbs = carbs.rename(columns={'Nutrient value':'Carbohydrate (g)'})
carbs = carbs.drop(columns=['Ingredient description', 'Nutrient code', 'Nutrient description'])
carbs = carbs.reset_index()

In [7]:
nutrients = pd.concat([fiber, carbs['Carbohydrate (g)'], energy['Energy (kcal)']], axis = 1)

In [8]:
nutrients = nutrients.drop(columns='index')
nutrients

Unnamed: 0,Ingredient code,Fiber (g),Carbohydrate (g),Energy (kcal)
0,1001,0.0,0.06,717.0
1,1002,0.0,0.00,731.0
2,1003,0.0,0.00,876.0
3,1004,0.0,2.34,353.0
4,1005,0.0,2.79,371.0
...,...,...,...,...
2327,907081,0.0,2.20,106.0
2328,907961,0.0,2.92,98.0
2329,907971,0.0,6.31,281.0
2330,912695,10.3,18.82,614.0


In [9]:
# Load data for ingredientized ASA24
asa24 = pd.read_csv('asa_fndds_matched_120721.csv')

In [10]:
asa24 = asa24[asa24['Food_Description'].notna()]

In [11]:
asa24['uniqueID'] = asa24['UserName'].astype(str) + asa24['FoodCode'].astype(str) + asa24['Ingredient code'].astype(str) + asa24['RecallNo'].astype(str) + asa24['FoodNum'].astype(str) + asa24['CodeNum'].astype(str) + asa24['Occ_No'].astype(str) + asa24['FoodAmt'].astype(str) + asa24['Seq num'].astype(str)

In [12]:
asa24 = asa24[asa24['uniqueID'].notna()]
asa24.shape

(34788, 113)

In [13]:
# Load remapped food_ingredient code data
code_remap = pd.read_csv('ingred_code_remapped_102021.csv')

In [15]:
# Drop columns to better view data
cols = ['UserName',
        'uniqueID',
 'FoodCode',
 'Food_Description',
 'fndds_description',
 'WWEIA Category number',
 'WWEIA Category description',
 'RecallNo',
 'FoodAmt',                
 'FoodNum',
 'Ingredient code',
 'Ingredient description',
 'Ingredient weight (g)']
asa24 = asa24[cols]

In [16]:
asa_recode = pd.merge(asa24, code_remap, how='outer', on='Ingredient code')

In [18]:
asa_recode = asa_recode[asa_recode['Ingredient code'].isin(code_remap['Ingredient code']) ==True]

In [19]:
asa_recode = asa_recode.drop(columns=['Ingredient code', 'Ingredient description', 'Ingredient weight (g)', 'Ingredient description_x', 'Ingredient weight (g)_x'])

In [20]:
asa_recode = asa_recode.rename(columns={'Ingredient_subcode':'Ingredient code', 'Ingredient description_y': 'Ingredient description', 'Ingredient weight (g)_y': 'Ingredient weight (g)'})

In [21]:
asa24 = asa24[asa24['Ingredient code'] < 10000000]

In [22]:
asa24_all = pd.concat([asa24, asa_recode])

In [23]:
asa24_all.shape

(45739, 13)

In [24]:
asa_nutrients = pd.merge(asa24_all, nutrients, how='inner', on='Ingredient code')

In [25]:
asa_nutrients.shape

(45739, 16)

In [26]:
ingred_sum = asa_nutrients.groupby(['UserName', 'FoodCode','uniqueID', 'Food_Description', 'Ingredient code', 'WWEIA Category number', 'WWEIA Category description', 'Ingredient description', 'FoodAmt', 'FoodNum','RecallNo', 'Fiber (g)', 'Carbohydrate (g)', 'Energy (kcal)'], dropna=True)['Ingredient weight (g)'].agg(np.sum)

In [27]:
ingred_df = ingred_sum.reset_index()

In [29]:
ingred_df['Ingredient code'].nunique()

1199

In [30]:
ingred_wts = ingred_sum.groupby(level=['UserName', 'FoodCode', 'FoodNum', 'RecallNo']).transform(np.sum)

In [31]:
ingred_wt = ingred_wts.to_frame()

In [32]:
ingred_wt.rename(columns={'Ingredient weight (g)': 'ingredient_weight_total'}, inplace=True)

In [33]:
ingred_sum = ingred_sum.to_frame()

In [35]:
ingred_wt['Ingredient weight (g)'] = ingred_sum['Ingredient weight (g)']

In [36]:
ingred_wt = ingred_wt.reset_index()

In [38]:
ingred_wt['Proportion of ingredient'] = ingred_wt['Ingredient weight (g)'] / ingred_wt['ingredient_weight_total']

In [39]:
ingred_wt['ingredient_consumed_g'] = ingred_wt['Proportion of ingredient'] * ingred_wt['FoodAmt']

In [40]:
ingred_wt['fiber_consumed_g'] = ingred_wt['ingredient_consumed_g'] * (ingred_wt['Fiber (g)']/100)

In [41]:
ingred_wt['carb_consumed_g'] = ingred_wt['ingredient_consumed_g'] * (ingred_wt['Carbohydrate (g)']/100)

In [42]:
ingred_wt['cal_consumed'] = ingred_wt['ingredient_consumed_g'] * (ingred_wt['Energy (kcal)']/100)

In [43]:
ingred_wt['cal_from_carb'] = ingred_wt['carb_consumed_g']*4

In [44]:
ingred_wt.to_csv('ingredient_fiber_carb_weights_nndc_120721.csv')

In [None]:
# Cells below used for determining the frequency of foods consumed in the cohort. Not used for this project.

In [45]:
ingred_freq = ingred_wt.groupby('Ingredient description')['Ingredient description'].count()

In [46]:
ingred_freq = ingred_freq.sort_values(ascending=False).to_frame()

In [47]:
ingred_freq.rename(columns={'Ingredient description': 'frequency'}).to_csv('ingredient_frequency_nndc_120721.csv')