## Separate core and mixed foods for ingredientized food recalls

### Create a unique list of food ingredients to determine which FNDDS ingredients need to be further ingredientized

In [1]:
# Import packages
import numpy as np
import pandas as pd

In [2]:
# Load data
asa_fndds = pd.read_csv('data/asa24/asa24_ingredients/asa_fndds_matched_101121.csv')

In [3]:
asa_fndds['Ingredient description'].nunique()

1338

In [4]:
asa_fndds_unique_ingred = asa_fndds.drop_duplicates(subset=['Ingredient description'])

In [5]:
asa_fndds_unique_ingred.to_csv('data/asa24/asa24_ingredients/asa_fndds_unique_ingred_101121.csv', index=None)

In [9]:
asa_fndds_unique_ingred.shape

(1338, 110)

### Following code used to seperate FNDDS mixed vs. single ingredients based on if they contain more than one ingredient per FoodCode 

In [3]:
mixed_food = asa_fndds.loc[asa_fndds['Seq num'] >= 2]
len(mixed_food)

15574

In [4]:
mixed_food['FoodCode'].unique() # all foodcodes with more than one ingredient


array([21500100., 75226111., 58106512., ..., 71601040., 93301030.,
       26319121.])

In [5]:
mixed = asa_fndds[asa_fndds['FoodCode'].isin(mixed_food['FoodCode'])]
mixed

Unnamed: 0,UserName,FoodCode,Food_Description,fndds_description,FoodAmt,Seq num,Ingredient code,Ingredient description,Ingredient weight (g),CARB,...,F_CITMLB,F_OTHER,D_TOTAL,D_MILK,D_YOGURT,D_CHEESE,FoodComp,Modified,Retention code,Moisture change (%)
16,5002,21500100.0,"Ground beef, less than 80% lean, cooked (forme...","Ground beef, cooked",79.0,1,23578,"Beef, ground, 75% lean meat / 25% fat, patty, ...",40.000,0.000,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,,0,0.0
17,5002,21500100.0,"Ground beef, less than 80% lean, cooked (forme...","Ground beef, cooked",79.0,2,23573,"Beef, ground, 80% lean meat / 20% fat, patty, ...",30.000,0.000,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,,0,0.0
18,5002,21500100.0,"Ground beef, less than 80% lean, cooked (forme...","Ground beef, cooked",79.0,3,23568,"Beef, ground, 85% lean meat / 15% fat, patty, ...",15.000,0.000,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,,0,0.0
19,5002,21500100.0,"Ground beef, less than 80% lean, cooked (forme...","Ground beef, cooked",79.0,4,23563,"Beef, ground, 90% lean meat / 10% fat, patty, ...",15.000,0.000,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,,0,0.0
20,5002,21500100.0,"Ground beef, less than 80% lean, cooked (forme...","Ground beef, cooked",79.0,5,2047,"Salt, table, iodized",0.840,0.000,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,,0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
34771,9057,93301030.0,Bloody Mary,Bloody Mary,360.0,5,2055,"Horseradish, prepared",5.000,11.952,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,,0,0.0
34775,9069,26319121.0,"Shrimp, baked or broiled, made with butter","Shrimp, baked or broiled, made with butter",145.0,1,15149,"Crustaceans, shrimp, mixed species, raw (may c...",453.600,1.711,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,,2753,-20.0
34776,9069,26319121.0,"Shrimp, baked or broiled, made with butter","Shrimp, baked or broiled, made with butter",145.0,2,2047,"Salt, table, iodized",3.000,1.711,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,,0,-20.0
34777,9069,26319121.0,"Shrimp, baked or broiled, made with butter","Shrimp, baked or broiled, made with butter",145.0,3,1001,"Butter, salted",14.000,1.711,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,,0,-20.0


In [6]:
mixed = mixed.drop_duplicates(subset= 'Ingredient description')

In [8]:
core = asa_fndds[~asa_fndds['FoodCode'].isin(mixed_food['FoodCode'])]
core

Unnamed: 0,UserName,FoodCode,Food_Description,fndds_description,FoodAmt,Seq num,Ingredient code,Ingredient description,Ingredient weight (g),CARB,...,F_CITMLB,F_OTHER,D_TOTAL,D_MILK,D_YOGURT,D_CHEESE,FoodComp,Modified,Retention code,Moisture change (%)
0,5001,75117010.0,"Onion, young green, cooked, from fresh, NS as ...","Onions, green, raw",29.0,1,11291,"Onions, spring or scallions (includes tops and...",100.0,2.1460,...,0.0000,0.0,0.0,0.0,0.0,0.0,1.0,,0,0.0
1,6022,75117010.0,"Onions, young green, raw","Onions, green, raw",25.0,1,11291,"Onions, spring or scallions (includes tops and...",100.0,1.8350,...,0.0000,0.0,0.0,0.0,0.0,0.0,1.0,,0,0.0
2,8096,75117010.0,"Onions, young green, raw","Onions, green, raw",5.0,1,11291,"Onions, spring or scallions (includes tops and...",100.0,0.3670,...,0.0000,0.0,0.0,0.0,0.0,0.0,1.0,,0,0.0
3,9041,75117010.0,"Onions, young green, raw","Onions, green, raw",15.0,1,11291,"Onions, spring or scallions (includes tops and...",100.0,1.1010,...,0.0000,0.0,0.0,0.0,0.0,0.0,1.0,,0,0.0
4,7122,75117010.0,"Onions, young green, raw","Onions, green, raw",12.5,1,11291,"Onions, spring or scallions (includes tops and...",100.0,0.9175,...,0.0000,0.0,0.0,0.0,0.0,0.0,1.0,,0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
34759,9050,95310600.0,Red Bull Energy Drink,Energy drink (Red Bull),240.0,1,14154,"Beverages, Energy drink, RED BULL",100.0,26.2560,...,0.0000,0.0,0.0,0.0,0.0,0.0,1.0,,0,0.0
34766,9052,53202000.0,"Cookie, almond","Cookie, almond",30.0,1,18192,"Cookies, shortbread, commercially prepared, plain",100.0,19.3500,...,0.0000,0.0,0.0,0.0,0.0,0.0,1.0,,0,0.0
34772,9057,53381000.0,"Pie, lemon meringue","Pie, lemon meringue",129.0,1,18320,"Pie, lemon meringue, commercially prepared",100.0,60.8880,...,0.0000,0.0,0.0,0.0,0.0,0.0,1.0,,0,0.0
34773,9067,62105000.0,"Blueberries, dried","Blueberries, dried",15.0,1,9163,"Blueberries, dried, sweetened",100.0,12.0000,...,0.3105,0.0,0.0,0.0,0.0,0.0,1.0,,0,0.0


In [9]:
core['FoodCode'].nunique()

867