## The following code is used to deconvolute ingredient codes that are represented by 8-digit food codes for proper ingredientization

In [1]:
import pandas as pd

In [2]:
# Load data
fndds = pd.read_csv('data/fndds/fndds_2018/fndds2018.csv')
asa_fndds = pd.read_csv('data/asa24/asa24_ingredients/asa_fndds_matched_120721.csv')

In [3]:
# Extract all ingredient codes represented as 8-digit foodcodes
asa_fndds = asa_fndds[asa_fndds['Ingredient code']>10000000]

In [4]:
asa_fndds.shape

(3821, 110)

In [5]:
asa_fndds = asa_fndds.drop_duplicates(subset ='Ingredient code')
asa_fndds.FoodCode.astype(int)

381      75226111
852      75403200
1174     58145112
1175     58145112
1432     41301990
           ...   
34546    58104730
34632    53452420
34633    53452420
34642    27347250
34738    58100520
Name: FoodCode, Length: 165, dtype: int64

In [6]:
fndds.drop(columns=['Main food description', 'WWEIA Category number', 'WWEIA Category description', 'Seq num', 'Retention code', 'Moisture change (%)'], inplace=True)

In [7]:
fndds.rename(columns={'Ingredient code':'Ingred_code'}, inplace = True)

In [8]:
fndds.rename(columns={'Food code':'Ingredient code'}, inplace = True)

In [9]:
asa_recode = pd.merge(asa_fndds, fndds, on='Ingredient code')

In [10]:
asa_recode.to_csv('ingred_recode/asa_recode.csv')

In [11]:
asa_recode[asa_recode.Ingred_code > 10000000]

Unnamed: 0,UserName,FoodCode,Food_Description,fndds_description,FoodAmt,Seq num,Ingredient code,Ingredient description_x,Ingredient weight (g)_x,UserID,...,D_MILK,D_YOGURT,D_CHEESE,FoodComp,Modified,Retention code,Moisture change (%),Ingred_code,Ingredient description_y,Ingredient weight (g)_y
0,5005,75226111.0,"Peppers, hot, cooked, NS as to form, fat added...","Hot peppers, cooked",17.62500,3,81200100,"Oil or table fat, NFS",3.0,59ec1000-a384-4f54-823c-b35659d5e8f0,...,0.000000,0.0,0.00000,1.0,,0,-4.0,81100000,"Table fat, NFS",50.0
1,5005,75226111.0,"Peppers, hot, cooked, NS as to form, fat added...","Hot peppers, cooked",17.62500,3,81200100,"Oil or table fat, NFS",3.0,59ec1000-a384-4f54-823c-b35659d5e8f0,...,0.000000,0.0,0.00000,1.0,,0,-4.0,82101000,"Vegetable oil, NFS",50.0
10,5008,58145112.0,"Macaroni or noodles with cheese, from boxed mi...","Macaroni or noodles with cheese, made from pac...",217.00000,4,81100000,"Table fat, NFS",56.0,4502f149-b5d2-478c-bc5a-e2a6075f421c,...,0.006510,0.0,0.24304,1.0,,0,-2.0,81100500,"Butter, NFS",60.0
11,5008,58145112.0,"Macaroni or noodles with cheese, from boxed mi...","Macaroni or noodles with cheese, made from pac...",217.00000,4,81100000,"Table fat, NFS",56.0,4502f149-b5d2-478c-bc5a-e2a6075f421c,...,0.006510,0.0,0.24304,1.0,,0,-2.0,81102000,"Margarine, NFS",40.0
13,5014,41301990.0,"Chickpeas, dry, cooked, NS as to fat added in ...","Chickpeas, NFS",31.68750,1,41302010,"Chickpeas, from dried, fat added",100.0,49ef5108-c910-4811-bbde-643e01abe475,...,0.000000,0.0,0.00000,1.0,,0,0.0,82101000,"Vegetable oil, NFS",7.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
441,7016,51300150.0,"Bread, whole wheat, NS as to 100%, made from h...","Bread, whole wheat, made from home recipe or p...",40.54169,1,51300140,"Bread, whole wheat, made from home recipe or p...",100.0,b50999f0-6e91-4247-af0c-7a762885698a,...,0.016217,0.0,0.00000,1.0,,306,-9.0,11100000,"Milk, NFS",122.0
448,7028,75143000.0,"Lettuce, salad with assorted vegetables includ...","Lettuce, salad with assorted vegetables includ...",36.50000,1,75114000,"Mixed salad greens, raw",70.0,61cc1612-8144-4aeb-b148-ec705189f35d,...,0.000000,0.0,0.00000,1.0,,0,0.0,75113000,"Lettuce, raw",50.0
449,7028,75143000.0,"Lettuce, salad with assorted vegetables includ...","Lettuce, salad with assorted vegetables includ...",36.50000,1,75114000,"Mixed salad greens, raw",70.0,61cc1612-8144-4aeb-b148-ec705189f35d,...,0.000000,0.0,0.00000,1.0,,0,0.0,75113060,"Lettuce, Boston, raw",25.0
459,8045,53452420.0,"Pastry, puff, custard or cream filled, iced or...","Pastry, puff, custard or cream filled, iced or...",64.99950,2,13210300,Custard,22.0,436bf63a-9c25-47bd-9356-a8e8e881f5c0,...,0.052000,0.0,0.00000,1.0,,0,0.0,11100000,"Milk, NFS",488.0


In [12]:
asa_code_remap = asa_recode[asa_recode.Ingred_code > 10000000]

In [13]:
asa_code_remap_unique = asa_code_remap.drop_duplicates(subset = 'Ingred_code')

In [14]:
asa_code_remap_unique

Unnamed: 0,UserName,FoodCode,Food_Description,fndds_description,FoodAmt,Seq num,Ingredient code,Ingredient description_x,Ingredient weight (g)_x,UserID,...,D_MILK,D_YOGURT,D_CHEESE,FoodComp,Modified,Retention code,Moisture change (%),Ingred_code,Ingredient description_y,Ingredient weight (g)_y
0,5005,75226111.0,"Peppers, hot, cooked, NS as to form, fat added...","Hot peppers, cooked",17.625,3,81200100,"Oil or table fat, NFS",3.0,59ec1000-a384-4f54-823c-b35659d5e8f0,...,0.0,0.0,0.0,1.0,,0,-4.0,81100000,"Table fat, NFS",50.0
1,5005,75226111.0,"Peppers, hot, cooked, NS as to form, fat added...","Hot peppers, cooked",17.625,3,81200100,"Oil or table fat, NFS",3.0,59ec1000-a384-4f54-823c-b35659d5e8f0,...,0.0,0.0,0.0,1.0,,0,-4.0,82101000,"Vegetable oil, NFS",50.0
10,5008,58145112.0,"Macaroni or noodles with cheese, from boxed mi...","Macaroni or noodles with cheese, made from pac...",217.0,4,81100000,"Table fat, NFS",56.0,4502f149-b5d2-478c-bc5a-e2a6075f421c,...,0.00651,0.0,0.24304,1.0,,0,-2.0,81100500,"Butter, NFS",60.0
11,5008,58145112.0,"Macaroni or noodles with cheese, from boxed mi...","Macaroni or noodles with cheese, made from pac...",217.0,4,81100000,"Table fat, NFS",56.0,4502f149-b5d2-478c-bc5a-e2a6075f421c,...,0.00651,0.0,0.24304,1.0,,0,-2.0,81102000,"Margarine, NFS",40.0
26,5019,32130265.0,"Egg omelet or scrambled egg, with ham or bacon...","Egg omelet or scrambled egg, with meat, NS as ...",170.0,4,99992230,Breakfast meat as ingredient in omelet,15.0,e3a9d3ee-329e-4736-96e7-fead3c7fe69f,...,0.153,0.0,0.0,1.0,,0,0.0,25220710,Chorizo,10.0
27,5019,32130265.0,"Egg omelet or scrambled egg, with ham or bacon...","Egg omelet or scrambled egg, with meat, NS as ...",170.0,4,99992230,Breakfast meat as ingredient in omelet,15.0,e3a9d3ee-329e-4736-96e7-fead3c7fe69f,...,0.153,0.0,0.0,1.0,,0,0.0,25221830,Turkey or chicken sausage,10.0
46,5059,58100120.0,Burrito with beef and beans,Burrito with meat and beans,238.0,4,27116350,"Stewed seasoned ground beef, Mexican style",444.0,6ccc6caa-e77f-4491-b9f4-52df3c1b11e2,...,0.0,0.0,0.0,1.0,,770,0.0,21500000,"Ground beef, raw",908.0
69,6039,58160132.0,Rice with beans and chicken W/O FAT,"Beans and rice, with meat",358.5,1,41104250,Pinto beans with meat,45.0,a8f38458-bcd3-496c-b3d8-e182328f7f42,...,0.0,0.0,0.0,1.0,,0,0.0,41104010,"Pinto beans, from dried, fat added",86.0
70,6039,58160132.0,Rice with beans and chicken W/O FAT,"Beans and rice, with meat",358.5,1,41104250,Pinto beans with meat,45.0,a8f38458-bcd3-496c-b3d8-e182328f7f42,...,0.0,0.0,0.0,1.0,,0,0.0,89901002,"Ham, for use with vegetables",7.0
71,6039,58160132.0,Rice with beans and chicken W/O FAT,"Beans and rice, with meat",358.5,1,41104250,Pinto beans with meat,45.0,a8f38458-bcd3-496c-b3d8-e182328f7f42,...,0.0,0.0,0.0,1.0,,0,0.0,89901004,"Beef, for use with vegetables",7.0


### Following code is to iterate through the ingredient codes that contain an 8-digit foodcode and merge with the parent foodcode to find embedded ingredient codes, some ingredient codes require mapping iterations

In [15]:
asa_code_remap_unique.to_csv('ingred_recode/unique_codes_to_map_102021.csv')

In [16]:
asa_recode.rename(columns={'Ingred_code':'Ingredient_subcode'}, inplace=True)

In [17]:
fndds.rename(columns={'Ingredient code':'Ingredient_subcode'}, inplace=True)

In [18]:
asa_recode2 = pd.merge(asa_recode, fndds, on='Ingredient_subcode')

In [19]:
asa_recode2.to_csv('ingred_recode/asa_recode2.csv')

In [20]:
asa_recode2.rename(columns={'Ingred_code':'Ingredient_subcode2'}, inplace=True)

In [21]:
fndds.rename(columns={'Ingredient_subcode':'Ingredient_subcode2'}, inplace=True)

In [22]:
asa_recode3 = pd.merge(asa_recode2, fndds, on='Ingredient_subcode2')

In [23]:
asa_recode3.to_csv('ingred_recode/asa_recode3.csv')

In [24]:
asa_recode3.rename(columns={'Ingred_code':'Ingredient_subcode3'}, inplace=True)

In [25]:
fndds.rename(columns={'Ingredient_subcode2':'Ingredient_subcode3'}, inplace=True)

In [26]:
asa_recode4 = pd.merge(asa_recode3, fndds, on='Ingredient_subcode3')

In [27]:
asa_recode4.to_csv('ingred_recode/asa_recode4.csv')

In [28]:
asa_recode4.rename(columns={'Ingred_code':'Ingredient_subcode4'}, inplace=True)

In [29]:
fndds.rename(columns={'Ingredient_subcode3':'Ingredient_subcode4'}, inplace=True)

In [30]:
asa_recode5 = pd.merge(asa_recode4, fndds, on='Ingredient_subcode4')

In [31]:
asa_recode5.to_csv('ingred_recode/asa_recode5.csv')

In [32]:
asa_recode5.rename(columns={'Ingred_code':'Ingredient_subcode5'}, inplace=True)

In [33]:
fndds.rename(columns={'Ingredient_subcode4':'Ingredient_subcode5'}, inplace=True)

In [34]:
asa_recode6 = pd.merge(asa_recode5, fndds, on='Ingredient_subcode5')

In [35]:
asa_recode6.to_csv('ingred_recode/asa_recode6.csv')

In [36]:
# All csv files combined manually to create the final mappings: ingred_code_remapped_102021.csv