## The following code is used to deconvolute ingredient codes that are represented by 8-digit food codes for proper ingredientization

In [1]:
import pandas as pd

In [3]:
# Load data
fndds = pd.read_csv('fndds2018.csv')
asa_fndds = pd.read_csv('asa_fndds_matched_120721.csv')

In [4]:
# Extract all ingredient codes represented as 8-digit foodcodes
asa_fndds = asa_fndds[asa_fndds['Ingredient code']>10000000]

In [5]:
asa_fndds.shape

(3821, 112)

In [6]:
asa_fndds = asa_fndds.drop_duplicates(subset ='Ingredient code')

381      75226111
852      75403200
1174     58145112
1175     58145112
1432     41301990
           ...   
34546    58104730
34632    53452420
34633    53452420
34642    27347250
34738    58100520
Name: FoodCode, Length: 165, dtype: int64

In [6]:
fndds.drop(columns=['Main food description', 'WWEIA Category number', 'WWEIA Category description', 'Seq num', 'Retention code', 'Moisture change (%)'], inplace=True)

In [7]:
fndds.rename(columns={'Ingredient code':'Ingred_code'}, inplace = True)

In [8]:
fndds.rename(columns={'Food code':'Ingredient code'}, inplace = True)

In [9]:
asa_recode = pd.merge(asa_fndds, fndds, on='Ingredient code')

In [10]:
asa_recode.to_csv('ingred_recode/asa_recode.csv')

In [12]:
asa_code_remap = asa_recode[asa_recode.Ingred_code > 10000000] # get only 8-digit foodcodes

In [13]:
asa_code_remap_unique = asa_code_remap.drop_duplicates(subset = 'Ingred_code')

### Following code is to iterate through the ingredient codes that contain an 8-digit foodcode and merge with the parent foodcode to find embedded ingredient codes, some ingredient codes require mapping iterations

In [15]:
asa_code_remap_unique.to_csv('ingred_recode/unique_codes_to_map_102021.csv')

In [16]:
asa_recode.rename(columns={'Ingred_code':'Ingredient_subcode'}, inplace=True)

In [17]:
fndds.rename(columns={'Ingredient code':'Ingredient_subcode'}, inplace=True)

In [18]:
asa_recode2 = pd.merge(asa_recode, fndds, on='Ingredient_subcode')

In [19]:
asa_recode2.to_csv('ingred_recode/asa_recode2.csv')

In [20]:
asa_recode2.rename(columns={'Ingred_code':'Ingredient_subcode2'}, inplace=True)

In [21]:
fndds.rename(columns={'Ingredient_subcode':'Ingredient_subcode2'}, inplace=True)

In [22]:
asa_recode3 = pd.merge(asa_recode2, fndds, on='Ingredient_subcode2')

In [23]:
asa_recode3.to_csv('ingred_recode/asa_recode3.csv')

In [24]:
asa_recode3.rename(columns={'Ingred_code':'Ingredient_subcode3'}, inplace=True)

In [25]:
fndds.rename(columns={'Ingredient_subcode2':'Ingredient_subcode3'}, inplace=True)

In [26]:
asa_recode4 = pd.merge(asa_recode3, fndds, on='Ingredient_subcode3')

In [27]:
asa_recode4.to_csv('ingred_recode/asa_recode4.csv')

In [28]:
asa_recode4.rename(columns={'Ingred_code':'Ingredient_subcode4'}, inplace=True)

In [29]:
fndds.rename(columns={'Ingredient_subcode3':'Ingredient_subcode4'}, inplace=True)

In [30]:
asa_recode5 = pd.merge(asa_recode4, fndds, on='Ingredient_subcode4')

In [31]:
asa_recode5.to_csv('ingred_recode/asa_recode5.csv')

In [32]:
asa_recode5.rename(columns={'Ingred_code':'Ingredient_subcode5'}, inplace=True)

In [33]:
fndds.rename(columns={'Ingredient_subcode4':'Ingredient_subcode5'}, inplace=True)

In [34]:
asa_recode6 = pd.merge(asa_recode5, fndds, on='Ingredient_subcode5')

In [35]:
asa_recode6.to_csv('ingred_recode/asa_recode6.csv')

In [36]:
# All csv files combined manually to create the final mappings: ingred_code_remapped_102021.csv