# Map fiber content in ASA24 foods from FNDDS 2018

In [1]:
import numpy as np
import pandas as pd

In [3]:
# Load data from FNDDS ingredients containing carobhydrate values
nutrient_values = pd.read_csv('data/fndds/fndds_2018/fndds_2018_ingredient_carbohydrate_values.csv')
nutrient_values['Nutrient description'].unique()

array(['Protein', 'Total Fat', 'Carbohydrate', 'Energy', 'Alcohol',
       'Water', 'Caffeine', 'Theobromine', 'Sugars, total',
       'Fiber, total dietary', 'Calcium', 'Iron', 'Magnesium',
       'Phosphorus', 'Potassium', 'Sodium', 'Zinc', 'Copper', 'Selenium',
       'Retinol', 'Vitamin A, RAE', 'Carotene, beta', 'Carotene, alpha',
       'Vitamin E (alpha-tocopherol)', 'Vitamin D (D2 + D3)',
       'Cryptoxanthin, beta', 'Lycopene', 'Lutein + zeaxanthin',
       'Vitamin C', 'Thiamin', 'Riboflavin', 'Niacin', 'Vitamin B-6',
       'Folate, total', 'Vitamin B-12', 'Choline, total',
       'Vitamin K (phylloquinone)', 'Folic acid', 'Folate, food',
       'Folate, DFE', 'Vitamin E, added', 'Vitamin B-12, added',
       'Cholesterol', 'Fatty acids, total saturated', '4:0', '6:0', '8:0',
       '10:0', '12:0', '14:0', '16:0', '18:0', '18:1', '18:2', '18:3',
       '20:4', '22:6 n-3', '16:1', '18:4', '20:1', '20:5 n-3', '22:1',
       '22:5 n-3', 'Fatty acids, total monounsaturated',
  

In [4]:
fiber = nutrient_values[nutrient_values['Nutrient description'] == 'Fiber, total dietary']
fiber = fiber.rename(columns={'Nutrient value':'Fiber (g)'})
fiber = fiber.drop(columns=['Ingredient description', 'Nutrient code', 'Nutrient description'])
fiber = fiber.reset_index()

In [5]:
energy =  nutrient_values[nutrient_values['Nutrient description'] == 'Energy']
energy = energy.rename(columns={'Nutrient value':'Energy (kcal)'})
energy = energy.drop(columns=['Ingredient description', 'Nutrient code', 'Nutrient description'])
energy = energy.reset_index()

In [6]:
carbs = nutrient_values[nutrient_values['Nutrient description'] == 'Carbohydrate']
carbs = carbs.rename(columns={'Nutrient value':'Carbohydrate (g)'})
carbs = carbs.drop(columns=['Ingredient description', 'Nutrient code', 'Nutrient description'])
carbs = carbs.reset_index()

In [7]:
nutrients = pd.concat([fiber, carbs['Carbohydrate (g)'], energy['Energy (kcal)']], axis = 1)

In [8]:
nutrients = nutrients.drop(columns='index')
nutrients

Unnamed: 0,Ingredient code,Fiber (g),Carbohydrate (g),Energy (kcal)
0,1001,0.0,0.06,717.0
1,1002,0.0,0.00,731.0
2,1003,0.0,0.00,876.0
3,1004,0.0,2.34,353.0
4,1005,0.0,2.79,371.0
...,...,...,...,...
2327,907081,0.0,2.20,106.0
2328,907961,0.0,2.92,98.0
2329,907971,0.0,6.31,281.0
2330,912695,10.3,18.82,614.0


In [9]:
# Load data for ingredientized ASA24
asa24 = pd.read_csv('data/asa24/asa24_ingredients/asa_fndds_matched_nndc_120721.csv')

In [10]:
asa24 = asa24[asa24['Food_Description'].notna()]

In [11]:
asa24['uniqueID'] = asa24['UserName'].astype(str) + asa24['FoodCode'].astype(str) + asa24['Ingredient code'].astype(str) + asa24['RecallNo'].astype(str) + asa24['FoodNum'].astype(str) + asa24['CodeNum'].astype(str) + asa24['Occ_No'].astype(str) + asa24['FoodAmt'].astype(str) + asa24['Seq num'].astype(str)

In [12]:
asa24 = asa24[asa24['uniqueID'].notna()]
asa24.shape

(34788, 113)

In [13]:
# Load remapped food_ingredient code data
code_remap = pd.read_csv('ingred_recode/ingred_code_remapped_102021.csv')

In [14]:
code_remap

Unnamed: 0,Ingredient code,Ingredient description_x,Ingredient weight (g)_x,Ingredient_subcode,Ingredient description_y,Ingredient weight (g)_y
0,81200100,"Oil or table fat, NFS",3.0,1001,"Butter, salted",60.00
1,81200100,"Oil or table fat, NFS",3.0,4613,"Margarine-like, vegetable oil spread, 60% fat,...",40.00
2,81200100,"Oil or table fat, NFS",3.0,4044,"Oil, soybean, salad or cooking",50.00
3,81200100,"Oil or table fat, NFS",3.0,4518,"Oil, corn, industrial and retail, all purpose ...",10.00
4,81200100,"Oil or table fat, NFS",3.0,4582,"Oil, canola",15.00
...,...,...,...,...,...,...
628,21407120,"Beef, pot roast, braised or boiled, lean only ...",167.5,2047,"Salt, table, iodized",0.84
629,75311022,"Classic mixed vegetables, frozen, cooked, fat ...",100.0,11124,"Carrots, raw",25.00
630,75311022,"Classic mixed vegetables, frozen, cooked, fat ...",100.0,11052,"Beans, snap, green, raw",25.00
631,75311022,"Classic mixed vegetables, frozen, cooked, fat ...",100.0,11304,"Peas, green, raw",25.00


In [15]:
# Drop columns to better view data
cols = ['UserName',
        'uniqueID',
 'FoodCode',
 'Food_Description',
 'fndds_description',
 'WWEIA Category number',
 'WWEIA Category description',
 'RecallNo',
 'FoodAmt',                
 'FoodNum',
 'Ingredient code',
 'Ingredient description',
 'Ingredient weight (g)']
asa24 = asa24[cols]

In [16]:
asa_recode = pd.merge(asa24, code_remap, how='outer', on='Ingredient code')

In [17]:
asa_recode

Unnamed: 0,UserName,uniqueID,FoodCode,Food_Description,fndds_description,WWEIA Category number,WWEIA Category description,RecallNo,FoodAmt,FoodNum,Ingredient code,Ingredient description,Ingredient weight (g),Ingredient description_x,Ingredient weight (g)_x,Ingredient_subcode,Ingredient description_y,Ingredient weight (g)_y
0,5001,500175117010.01129121016.0529.01,75117010.0,"Onion, young green, cooked, from fresh, NS as ...","Onions, green, raw",6414,Onions,2,29.00,10,11291,"Onions, spring or scallions (includes tops and...",100.0,,,,,
1,6022,602275117010.01129121515.0525.01,75117010.0,"Onions, young green, raw","Onions, green, raw",6414,Onions,2,25.00,15,11291,"Onions, spring or scallions (includes tops and...",100.0,,,,,
2,8096,809675117010.011291277.035.01,75117010.0,"Onions, young green, raw","Onions, green, raw",6414,Onions,2,5.00,7,11291,"Onions, spring or scallions (includes tops and...",100.0,,,,,
3,9041,904175117010.0112912811.0315.01,75117010.0,"Onions, young green, raw","Onions, green, raw",6414,Onions,2,15.00,8,11291,"Onions, spring or scallions (includes tops and...",100.0,,,,,
4,7122,712275117010.011291333.0112.51,75117010.0,"Onions, young green, raw","Onions, green, raw",6414,Onions,3,12.50,3,11291,"Onions, spring or scallions (includes tops and...",100.0,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
45737,9049,904958134680.02290141717.0575.01,58134680.0,"Tortellini, cheese-filled, no sauce","Tortellini, cheese-filled, no sauce",3204,"Pasta mixed dishes, excludes macaroni and cheese",4,75.00,17,22901,"Tortellini, pasta with cheese filling, fresh-r...",100.0,,,,,
45738,9045,904514131000.0116541224.0528.351,14131000.0,Queso Anejo (aged Mexican cheese),"Queso Anejo, aged Mexican cheese",1602,Cheese,4,28.35,12,1165,"Cheese, mexican, queso anejo",100.0,,,,,
45739,9057,905753381000.01832041616.03129.01,53381000.0,"Pie, lemon meringue","Pie, lemon meringue",5502,Cakes and pies,4,129.00,16,18320,"Pie, lemon meringue, commercially prepared",100.0,,,,,
45740,9067,906762105000.0916341919.0615.01,62105000.0,"Blueberries, dried","Blueberries, dried",6016,Dried fruits,4,15.00,19,9163,"Blueberries, dried, sweetened",100.0,,,,,


In [18]:
asa_recode = asa_recode[asa_recode['Ingredient code'].isin(code_remap['Ingredient code']) ==True]

In [19]:
asa_recode = asa_recode.drop(columns=['Ingredient code', 'Ingredient description', 'Ingredient weight (g)', 'Ingredient description_x', 'Ingredient weight (g)_x'])

In [20]:
asa_recode = asa_recode.rename(columns={'Ingredient_subcode':'Ingredient code', 'Ingredient description_y': 'Ingredient description', 'Ingredient weight (g)_y': 'Ingredient weight (g)'})

In [21]:
asa24 = asa24[asa24['Ingredient code'] < 10000000]

In [22]:
asa24_all = pd.concat([asa24, asa_recode])

In [23]:
asa24_all.shape

(45739, 13)

In [24]:
asa_nutrients = pd.merge(asa24_all, nutrients, how='inner', on='Ingredient code')

In [25]:
asa_nutrients.shape

(45739, 16)

In [26]:
ingred_sum = asa_nutrients.groupby(['UserName', 'FoodCode','uniqueID', 'Food_Description', 'Ingredient code', 'WWEIA Category number', 'WWEIA Category description', 'Ingredient description', 'FoodAmt', 'FoodNum','RecallNo', 'Fiber (g)', 'Carbohydrate (g)', 'Energy (kcal)'], dropna=True)['Ingredient weight (g)'].agg(np.sum)

In [27]:
ingred_df = ingred_sum.reset_index()

In [28]:
ingred_df

Unnamed: 0,UserName,FoodCode,uniqueID,Food_Description,Ingredient code,WWEIA Category number,WWEIA Category description,Ingredient description,FoodAmt,FoodNum,RecallNo,Fiber (g),Carbohydrate (g),Energy (kcal),Ingredient weight (g)
0,5001,13210300.0,500113210300.01110000041515.04122.01,Custard,1077.0,5804,Pudding,"Milk, whole, 3.25% milkfat, with added vitamin D",122.0,15,4,0.0,4.67,60.0,35.0
1,5001,13210300.0,500113210300.01110000041515.04122.01,Custard,1079.0,5804,Pudding,"Milk, reduced fat, fluid, 2% milkfat, with add...",122.0,15,4,0.0,4.91,50.0,37.0
2,5001,13210300.0,500113210300.01110000041515.04122.01,Custard,1082.0,5804,Pudding,"Milk, lowfat, fluid, 1% milkfat, with added vi...",122.0,15,4,0.0,5.19,43.0,17.0
3,5001,13210300.0,500113210300.01110000041515.04122.01,Custard,1085.0,5804,Pudding,"Milk, nonfat, fluid, with added vitamin A and ...",122.0,15,4,0.0,4.89,34.0,11.0
4,5001,13210300.0,500113210300.0112341515.04122.03,Custard,1123.0,5804,Pudding,"Egg, whole, raw, fresh",122.0,15,4,0.0,0.72,143.0,100.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
45726,9069,92511015.0,906992511015.0146452616.02372.01,Fruit flavored drink (formerly lemonade),14645.0,7204,Fruit drinks,"Beverages, Fruit flavored drink, less than 3% ...",372.0,6,2,0.0,16.03,64.0,100.0
45727,9069,93401020.0,906993401020.014106377.022293.21,"Wine, table, white",14106.0,7504,Wine,"Alcoholic beverage, wine, table, white",2293.2,7,3,0.0,2.60,82.0,100.0
45728,9069,94000100.0,906994000100.014411211.014972.81,"Water, tap",14411.0,7702,Tap water,"Beverages, water, tap, drinking",4972.8,1,2,0.0,0.00,0.0,100.0
45729,9069,94000100.0,906994000100.014411311.013788.81,"Water, tap",14411.0,7702,Tap water,"Beverages, water, tap, drinking",3788.8,1,3,0.0,0.00,0.0,100.0


In [29]:
ingred_df['Ingredient code'].nunique()

1199

In [30]:
ingred_wts = ingred_sum.groupby(level=['UserName', 'FoodCode', 'FoodNum', 'RecallNo']).transform(np.sum)

In [31]:
ingred_wt = ingred_wts.to_frame()

In [32]:
ingred_wt.rename(columns={'Ingredient weight (g)': 'ingredient_weight_total'}, inplace=True)

In [33]:
ingred_sum = ingred_sum.to_frame()

In [34]:
ingred_sum

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Unnamed: 6_level_0,Unnamed: 7_level_0,Unnamed: 8_level_0,Unnamed: 9_level_0,Unnamed: 10_level_0,Unnamed: 11_level_0,Unnamed: 12_level_0,Unnamed: 13_level_0,Ingredient weight (g)
UserName,FoodCode,uniqueID,Food_Description,Ingredient code,WWEIA Category number,WWEIA Category description,Ingredient description,FoodAmt,FoodNum,RecallNo,Fiber (g),Carbohydrate (g),Energy (kcal),Unnamed: 14_level_1
5001,13210300.0,500113210300.01110000041515.04122.01,Custard,1077.0,5804,Pudding,"Milk, whole, 3.25% milkfat, with added vitamin D",122.0,15,4,0.0,4.67,60.0,35.0
5001,13210300.0,500113210300.01110000041515.04122.01,Custard,1079.0,5804,Pudding,"Milk, reduced fat, fluid, 2% milkfat, with added vitamin A and vitamin D",122.0,15,4,0.0,4.91,50.0,37.0
5001,13210300.0,500113210300.01110000041515.04122.01,Custard,1082.0,5804,Pudding,"Milk, lowfat, fluid, 1% milkfat, with added vitamin A and vitamin D",122.0,15,4,0.0,5.19,43.0,17.0
5001,13210300.0,500113210300.01110000041515.04122.01,Custard,1085.0,5804,Pudding,"Milk, nonfat, fluid, with added vitamin A and vitamin D (fat free or skim)",122.0,15,4,0.0,4.89,34.0,11.0
5001,13210300.0,500113210300.0112341515.04122.03,Custard,1123.0,5804,Pudding,"Egg, whole, raw, fresh",122.0,15,4,0.0,0.72,143.0,100.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9069,92511015.0,906992511015.0146452616.02372.01,Fruit flavored drink (formerly lemonade),14645.0,7204,Fruit drinks,"Beverages, Fruit flavored drink, less than 3% juice, not fortified with vitamin C",372.0,6,2,0.0,16.03,64.0,100.0
9069,93401020.0,906993401020.014106377.022293.21,"Wine, table, white",14106.0,7504,Wine,"Alcoholic beverage, wine, table, white",2293.2,7,3,0.0,2.60,82.0,100.0
9069,94000100.0,906994000100.014411211.014972.81,"Water, tap",14411.0,7702,Tap water,"Beverages, water, tap, drinking",4972.8,1,2,0.0,0.00,0.0,100.0
9069,94000100.0,906994000100.014411311.013788.81,"Water, tap",14411.0,7702,Tap water,"Beverages, water, tap, drinking",3788.8,1,3,0.0,0.00,0.0,100.0


In [35]:
ingred_wt['Ingredient weight (g)'] = ingred_sum['Ingredient weight (g)']

In [36]:
ingred_wt = ingred_wt.reset_index()

In [37]:
ingred_wt.head(15)

Unnamed: 0,UserName,FoodCode,uniqueID,Food_Description,Ingredient code,WWEIA Category number,WWEIA Category description,Ingredient description,FoodAmt,FoodNum,RecallNo,Fiber (g),Carbohydrate (g),Energy (kcal),ingredient_weight_total,Ingredient weight (g)
0,5001,13210300.0,500113210300.01110000041515.04122.01,Custard,1077.0,5804,Pudding,"Milk, whole, 3.25% milkfat, with added vitamin D",122.0,15,4,0.0,4.67,60.0,239.134,35.0
1,5001,13210300.0,500113210300.01110000041515.04122.01,Custard,1079.0,5804,Pudding,"Milk, reduced fat, fluid, 2% milkfat, with add...",122.0,15,4,0.0,4.91,50.0,239.134,37.0
2,5001,13210300.0,500113210300.01110000041515.04122.01,Custard,1082.0,5804,Pudding,"Milk, lowfat, fluid, 1% milkfat, with added vi...",122.0,15,4,0.0,5.19,43.0,239.134,17.0
3,5001,13210300.0,500113210300.01110000041515.04122.01,Custard,1085.0,5804,Pudding,"Milk, nonfat, fluid, with added vitamin A and ...",122.0,15,4,0.0,4.89,34.0,239.134,11.0
4,5001,13210300.0,500113210300.0112341515.04122.03,Custard,1123.0,5804,Pudding,"Egg, whole, raw, fresh",122.0,15,4,0.0,0.72,143.0,239.134,100.0
5,5001,13210300.0,500113210300.01442941515.04122.05,Custard,14429.0,5804,Pudding,"Beverages, water, tap, municipal",122.0,15,4,0.0,0.0,0.0,239.134,1.234
6,5001,13210300.0,500113210300.01933541515.04122.02,Custard,19335.0,5804,Pudding,"Sugars, granulated",122.0,15,4,0.0,99.6,385.0,239.134,37.5
7,5001,13210300.0,500113210300.0204741515.04122.04,Custard,2047.0,5804,Pudding,"Salt, table, iodized",122.0,15,4,0.0,0.0,0.0,239.134,0.4
8,5001,13411000.0,500113411000.01110000031318.0615.6252,"White sauce, milk sauce",1077.0,8412,"Dips, gravies, other sauces","Milk, whole, 3.25% milkfat, with added vitamin D",15.625,13,3,0.0,4.67,60.0,217.125,35.0
9,5001,13411000.0,500113411000.01110000031318.0615.6252,"White sauce, milk sauce",1079.0,8412,"Dips, gravies, other sauces","Milk, reduced fat, fluid, 2% milkfat, with add...",15.625,13,3,0.0,4.91,50.0,217.125,37.0


In [38]:
ingred_wt['Proportion of ingredient'] = ingred_wt['Ingredient weight (g)'] / ingred_wt['ingredient_weight_total']

In [39]:
ingred_wt['ingredient_consumed_g'] = ingred_wt['Proportion of ingredient'] * ingred_wt['FoodAmt']

In [40]:
ingred_wt['fiber_consumed_g'] = ingred_wt['ingredient_consumed_g'] * (ingred_wt['Fiber (g)']/100)

In [41]:
ingred_wt['carb_consumed_g'] = ingred_wt['ingredient_consumed_g'] * (ingred_wt['Carbohydrate (g)']/100)

In [42]:
ingred_wt['cal_consumed'] = ingred_wt['ingredient_consumed_g'] * (ingred_wt['Energy (kcal)']/100)

In [43]:
ingred_wt['cal_from_carb'] = ingred_wt['carb_consumed_g']*4

In [44]:
ingred_wt.to_csv('data/asa24/asa24_ingredients/ingredient_fiber_carb_weights_nndc_120721.csv')

In [45]:
ingred_freq = ingred_wt.groupby('Ingredient description')['Ingredient description'].count()

In [46]:
ingred_freq = ingred_freq.sort_values(ascending=False).to_frame()

In [47]:
ingred_freq.rename(columns={'Ingredient description': 'frequency'}).to_csv('data/asa24/asa24_ingredients/ingredient_frequency_nndc_120721.csv')