## Ingredientize ASA24 food records using FNDDS ingredients

In [1]:
#import packages
import numpy as np
import pandas as pd

In [2]:
#Load data
fndds = pd.read_csv('fndds2018.csv')
asa24 = pd.read_csv('fl100_recalls_qcd.csv')

In [3]:
#Select columns: dropping WWEIA cols
fndds.drop(columns=['WWEIA Category number', 'WWEIA Category description'], inplace=True)

#Rename column 'Food code' to 'FoodCode' for merging with ASA data
fndds.columns=['FoodCode',
               'fndds_description',
               'Seq num',
               'Ingredient code',
               'Ingredient description',
               'Ingredient weight (g)',
               'Retention code',
               'Moisture change (%)']

In [4]:
# Remove foodcodes that did were not presetnt in ASA24 recalls
asa24 = asa24[asa24['FoodCode'] != 9]

In [5]:
# Rename column to distinguish ASA food description from FNDDS food description before merging datasets
asa24.rename(columns={'Food_Description': 'asa_description'}, inplace=True)

In [6]:
# Merge ASA/FNDDS datasets to ingredientize the ASA24 food records. 
# This will create an ingredient for each mixed food from foodcodes
asa_fndds = pd.merge(asa24, fndds, on='FoodCode')

In [8]:
#Rearrange columns of interest
cols_to_order = ['UserName',
 'FoodCode',
 'asa_description',
 'fndds_description',
 'Seq num',
 'Ingredient code',
 'Ingredient description']
new_columns = cols_to_order + (asa_fndds.columns.drop(cols_to_order).tolist())
asa_fndds = asa_fndds[new_columns]

In [9]:
asa_set = set(asa24['asa_description'].unique())
asa_fndds_set = set(asa_fndds['asa_description'].unique())

In [10]:
food_desc_diff = asa_set.difference(asa_fndds_set)
print(len(food_desc_diff), 'foodcodes not mapping from ASA24 to FNDDS2018')

483 foodcodes not mapping from ASA24 to FNDDS2018


In [11]:
food_desc_diff = pd.DataFrame(food_desc_diff)
food_desc_diff.columns = ['missing_food_descriptions']

In [12]:
food_list = food_desc_diff['missing_food_descriptions'].tolist()

In [13]:
missing_foods = asa24['asa_description'].isin(food_list)
fndds_missing = asa24[missing_foods]
fndds_missing = fndds_missing.drop_duplicates(subset = ['asa_description'])

In [17]:
fndds_missing_foods = fndds_missing[['FoodCode', 'asa_description']]
fndds_missing_foods.to_csv('fndds_missing_foods.csv', index=None, header=True)