# **The nutrients of the ingredients**

## Configuration:

Import necessary entities:

In [1]:
import os
import sys

sys.path.append(
    os.path.abspath(
        os.path.join(
            os.getcwd(),
            os.pardir,
        ),
    ),
)

from json import load
from typing import Any
from pandas import (
    DataFrame,
    read_csv,
    set_option,
)

from src.models.fdc import FDC

Prepare *Pandas* dataframe output:

In [2]:
set_option("display.width", 120, )
set_option("display.max_columns", None, )
set_option("display.expand_frame_repr", False, )

## Preprocessing:

Create a dictionary for `load()` function calling:

In [3]:
load_json_params: dict[str, str] = {
    "file": "food.json",

    "file_path": "../data/json/",
}

Read the file `food.json` data:

In [4]:
with open(
    encoding="utf-8",
    file=load_json_params["file_path"] + load_json_params["file"],
) as file:
    food_data: dict[str, list[str]] = load(file, )
    ingredients: list[str] = food_data["ingredients"]

Check `ingredients` list:

In [5]:
ingredients

['cod',
 'fig',
 'egg',
 'gin',
 'ham',
 'oat',
 'nut',
 'pea',
 'rum',
 'rye',
 'soy',
 'tea',
 'sage',
 'port',
 'date',
 'beef',
 'beet',
 'beer',
 'bran',
 'brie',
 'bass',
 'crab',
 'bean',
 'clam',
 'feta',
 'duck',
 'corn',
 'dill',
 'kiwi',
 'leek',
 'lime',
 'kale',
 'mint',
 'pear',
 'orzo',
 'plum',
 'okra',
 'pork',
 'seed',
 'sake',
 'rice',
 'tofu',
 'tuna',
 'veal',
 'yuca',
 'wine',
 'lamb',
 'apple',
 'basil',
 'bread',
 'bacon',
 'chive',
 'chard',
 'chile',
 'curry',
 'clove',
 'cumin',
 'anise',
 'grape',
 'honey',
 'goose',
 'guava',
 'mango',
 'lemon',
 'melon',
 'olive',
 'peach',
 'pecan',
 'pasta',
 'onion',
 'poppy',
 'prune',
 'quail',
 'squid',
 'thyme',
 'trout',
 'vodka',
 'midori',
 'almond',
 'barley',
 'brandy',
 'cashew',
 'celery',
 'carrot',
 'caviar',
 'capers',
 'bulgur',
 'butter',
 'cherry',
 'banana',
 'coffee',
 'endive',
 'fennel',
 'garlic',
 'lentil',
 'hummus',
 'kirsch',
 'ginger',
 'lychee',
 'mussel',
 'mezcal',
 'orange',
 'peanut',
 'p

Create a `Food Data Central` representative:

In [6]:
fdc: FDC = FDC()

## The nutrients of the ingredients:

Fetch ingredients data:

In [7]:
ingredients_data: list[
    dict[str, Any] | None
] = await fdc.fetch_ingredients_data(ingredients, )

Check `ingredients_data` list:

In [8]:
ingredients_data

[{'totalHits': 813,
  'currentPage': 1,
  'totalPages': 813,
  'pageList': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
  'foodSearchCriteria': {'query': 'cod',
   'generalSearchInput': 'cod',
   'pageNumber': 1,
   'numberOfResultsPerPage': 50,
   'pageSize': 1,
   'requireAllWords': False},
  'foods': [{'fdcId': 2099857,
    'description': 'COD',
    'dataType': 'Branded',
    'gtinUpc': '015292210037',
    'publishedDate': '2021-10-28',
    'brandOwner': "Morey's Seafood International",
    'brandName': "MOREY'S",
    'ingredients': 'COD, WATER, SUN-DRIED TOMATOES, BLACK OLIVES, PARMESAN CHEESE (PARTIALLY SKIM MILK, CULTURES, SALT, ENZYMES), MODIFIED CORN STARCH, SALT, DEHYDRATED GARLIC, CORN SYRUP SOLIDS, TOMATO POWDER, DEHYDRATED TOMATO, DEHYDRATED PARSLEY, MALTODEXTRIN, DEHYDRATED SUN-DRIED TOMATO, LEMON JUICE, GUAR GUM, NATURAL FLAVOR, OLIVE OIL BLEND (CANOLA OIL AND OLIVE OIL), BASIL, POTATO STARCH.',
    'marketCountry': 'United States',
    'foodCategory': 'Frozen Fish & Seafood',
    'm

Parse ingredients nutrients data:

In [9]:
ingredients_nutrients_data: list[
    list[list[str | None, float, str]]
] = FDC.parse_ingredients_nutrients_data(ingredients_data, )

Check `ingredients_nutrients_data` list:

In [10]:
ingredients_nutrients_data

[[['Protein', 12.4, 'G'],
  ['Total lipid (fat)', 2.94, 'G'],
  ['Carbohydrate, by difference', 5.29, 'G'],
  ['Energy', 100, 'KCAL'],
  ['Total Sugars', 0.0, 'G'],
  ['Fiber, total dietary', 0.6, 'G'],
  ['Calcium, Ca', 35.0, 'MG'],
  ['Iron, Fe', 0.85, 'MG'],
  ['Sodium, Na', 418, 'MG'],
  ['Vitamin A, IU', 118, 'IU'],
  ['Vitamin C, total ascorbic acid', 3.5, 'MG'],
  ['Cholesterol', 32.0, 'MG'],
  ['Fatty acids, total trans', 0.0, 'G'],
  ['Fatty acids, total saturated', 0.59, 'G']],
 [['Protein', 0.0, 'G'],
  ['Total lipid (fat)', 0.0, 'G'],
  ['Carbohydrate, by difference', 65.0, 'G'],
  ['Energy', 250, 'KCAL'],
  ['Total Sugars', 65.0, 'G'],
  ['Fiber, total dietary', 0.0, 'G'],
  ['Calcium, Ca', 15.0, 'MG'],
  ['Iron, Fe', 0.0, 'MG'],
  ['Potassium, K', 105, 'MG'],
  ['Sodium, Na', 5.0, 'MG'],
  ['Vitamin D (D2 + D3), International Units', 0.0, 'IU'],
  ['Sugars, added', 60.0, 'G'],
  ['Cholesterol', 0.0, 'MG'],
  ['Fatty acids, total trans', 0.0, 'G'],
  ['Fatty acids, total s

Standardize ingredients nutrients data:

In [11]:
std_ingredients_nutrients_data: list[
    list[list[str, float]]
] = FDC.standardize_ingredients_nutrients_data(ingredients_nutrients_data, )

Check `ingredients_nutrients_data` list:

In [12]:
std_ingredients_nutrients_data

[[['protein', 12.4],
  ['fat', 2.94],
  ['carbohydrate', 5.29],
  ['sugars', 0.0],
  ['fiber', 0.6],
  ['calcium', 0.035],
  ['iron', 0.00085],
  ['sodium', 0.418],
  ['vitamin a', 3.54e-05],
  ['vitamin c', 0.0035],
  ['cholesterol', 0.032],
  ['saturated fat', 0.59]],
 [['protein', 0.0],
  ['fat', 0.0],
  ['carbohydrate', 65.0],
  ['sugars', 65.0],
  ['fiber', 0.0],
  ['calcium', 0.015],
  ['iron', 0.0],
  ['potassium', 0.105],
  ['sodium', 0.005],
  ['vitamin d', 0.0],
  ['cholesterol', 0.0],
  ['saturated fat', 0.0]],
 [['protein', 6.41],
  ['fat', 28.8],
  ['carbohydrate', 57.7],
  ['sugars', 54.5],
  ['fiber', 3.2],
  ['calcium', 0.128],
  ['iron', 0.0],
  ['sodium', 0.176],
  ['vitamin c', 0.0],
  ['cholesterol', 0.032],
  ['saturated fat', 16.0],
  ['vitamin a', 0.0]],
 [['protein', 0.0],
  ['fat', 0.0],
  ['carbohydrate', 0.0],
  ['sugars', 0.0],
  ['fiber', 0.0],
  ['calcium', 0.0],
  ['iron', 4e-05],
  ['magnesium', 0.0],
  ['phosphorus', 0.004],
  ['potassium', 0.002],
  ['

Simplify ingredients nutrients data:

In [13]:
std_ingredients_nutrients_data: list[
    dict[str, float]
] = FDC.get_standardized_ingredients_nutrients_data(
    "food.json",
    "../data/json/",
    std_ingredients_nutrients_data,
)

Check `ingredients_nutrients_data` list:

In [14]:
std_ingredients_nutrients_data

[{'fat': 2.94,
  'iron': 0.00085,
  'zinc': 0,
  'fiber': 0.6,
  'sugars': 0.0,
  'sodium': 0.418,
  'niacin': 0,
  'folate': 0,
  'copper': 0,
  'biotin': 0,
  'iodine': 0,
  'protein': 12.4,
  'calcium': 0.035,
  'choline': 0,
  'thiamin': 0,
  'chloride': 0,
  'selenium': 0,
  'chromium': 0,
  'potassium': 0,
  'vitamin c': 0.0035,
  'magnesium': 0,
  'vitamin e': 0,
  'vitamin a': 3.54e-05,
  'manganese': 0,
  'vitamin d': 0,
  'vitamin k': 0,
  'phosphorus': 0,
  'riboflavin': 0,
  'vitamin b 6': 0,
  'molybdenum': 0,
  'cholesterol': 0.032,
  'vitamin b 12': 0,
  'carbohydrate': 5.29,
  'saturated fat': 0.59,
  'pantothenic acid': 0},
 {'fat': 0.0,
  'iron': 0.0,
  'zinc': 0,
  'fiber': 0.0,
  'sugars': 65.0,
  'sodium': 0.005,
  'niacin': 0,
  'folate': 0,
  'copper': 0,
  'biotin': 0,
  'iodine': 0,
  'protein': 0.0,
  'calcium': 0.015,
  'choline': 0,
  'thiamin': 0,
  'chloride': 0,
  'selenium': 0,
  'chromium': 0,
  'potassium': 0.105,
  'vitamin c': 0,
  'magnesium': 0,
  

Convert standardized ingredients nutrients data to *Pandas* dataframe:

In [15]:
df: DataFrame = FDC.get_standardized_ingredients_nutrients_dataframe(
    ingredients,
    std_ingredients_nutrients_data,
)

Check `df` *Pandas* dataframe:

In [16]:
df.head()

Unnamed: 0,fat,iron,zinc,fiber,sugars,sodium,niacin,folate,copper,biotin,iodine,protein,calcium,choline,thiamin,chloride,selenium,chromium,potassium,vitamin c,magnesium,vitamin e,vitamin a,manganese,vitamin d,vitamin k,phosphorus,riboflavin,vitamin b 6,molybdenum,cholesterol,vitamin b 12,carbohydrate,saturated fat,pantothenic acid,name
0,2.94,0.00085,0.0,0.6,0.0,0.418,0.0,0.0,0.0,0.0,0,12.4,0.035,0.0,0.0,0,0.0,0,0.0,0.0035,0.0,0.0,3.5e-05,0.0,0.0,0.0,0.0,0.0,0.0,0,0.032,0.0,5.29,0.59,0.0,cod
1,0.0,0.0,0.0,0.0,65.0,0.005,0.0,0.0,0.0,0.0,0,0.0,0.015,0.0,0.0,0,0.0,0,0.105,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,65.0,0.0,0.0,fig
2,28.8,0.0,0.0,3.2,54.5,0.176,0.0,0.0,0.0,0.0,0,6.41,0.128,0.0,0.0,0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.032,0.0,57.7,16.0,0.0,egg
3,0.0,4e-05,4e-05,0.0,0.0,0.001,1.3e-05,0.0,2.1e-05,0.0,0,0.0,0.0,0.0,6e-06,0,0.0,0,0.002,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.004,4e-06,1e-06,0,0.0,0.0,0.0,0.0,0.0,gin
4,3.87,0.00084,0.00184,0.0,1.54,1.149,0.006055,1e-06,0.000124,0.0,0,19.04,0.008,0.0756,0.000426,0,3.4e-05,0,0.371,0.0,0.02,0.00022,1.3e-05,0.0,7e-07,0.0,0.276,0.000191,0.000411,0,0.054,4e-07,1.58,1.285,0.0,ham


Create a dictionary for `read_csv()` function calling:

In [17]:
read_csv_params: dict[str, str] = {
    "file": "daily_nutrients_norm.tsv",

    "file_path": "../data/datasets/raw/",
}

Read the file `daily_nutrients_norm.tsv` data to a *Pandas* dataframe:

In [18]:
daily_nutrients_norm: DataFrame = read_csv(
    read_csv_params["file_path"] + read_csv_params["file"],
    sep='\t',
)

Check `daily_nutrients_norm` *Pandas* dataframe:

In [19]:
daily_nutrients_norm.head()

Unnamed: 0,name,number (g)
0,Fat,78.0
1,Fiber,28.0
2,Sugars,50.0
3,Iron,0.018
4,Zinc,0.011


Transform the values of `df` *Pandas* dataframe into `%` of the daily nutritions norm:

In [20]:
for _, row in daily_nutrients_norm.iterrows():
    df[row["name"].lower()] = round(
        df[row["name"].lower()] * 100 / row["number (g)"],
        3,
    )

Check `df` *Pandas* dataframe:

In [21]:
df

Unnamed: 0,fat,iron,zinc,fiber,sugars,sodium,niacin,folate,copper,biotin,iodine,protein,calcium,choline,thiamin,chloride,selenium,chromium,potassium,vitamin c,magnesium,vitamin e,vitamin a,manganese,vitamin d,vitamin k,phosphorus,riboflavin,vitamin b 6,molybdenum,cholesterol,vitamin b 12,carbohydrate,saturated fat,pantothenic acid,name
0,3.769,4.722,0.000,2.143,0.00,18.174,0.000,0.00,0.000,0.0,0.0,24.80,2.692,0.000,0.000,0.0,0.000,0.0,0.000,3.889,0.000,0.000,3.933,0.000,0.0,0.000,0.00,0.000,0.000,0.0,10.667,0.000,1.924,2.950,0.00,cod
1,0.000,0.000,0.000,0.000,130.00,0.217,0.000,0.00,0.000,0.0,0.0,0.00,1.154,0.000,0.000,0.0,0.000,0.0,2.234,0.000,0.000,0.000,0.000,0.000,0.0,0.000,0.00,0.000,0.000,0.0,0.000,0.000,23.636,0.000,0.00,fig
2,36.923,0.000,0.000,11.429,109.00,7.652,0.000,0.00,0.000,0.0,0.0,12.82,9.846,0.000,0.000,0.0,0.000,0.0,0.000,0.000,0.000,0.000,0.000,0.000,0.0,0.000,0.00,0.000,0.000,0.0,10.667,0.000,20.982,80.000,0.00,egg
3,0.000,0.222,0.364,0.000,0.00,0.043,0.081,0.00,2.333,0.0,0.0,0.00,0.000,0.000,0.500,0.0,0.000,0.0,0.043,0.000,0.000,0.000,0.000,0.000,0.0,0.000,0.32,0.308,0.059,0.0,0.000,0.000,0.000,0.000,0.00,gin
4,4.962,4.667,16.727,0.000,3.08,49.957,37.844,0.25,13.778,0.0,0.0,38.08,0.615,13.745,35.500,0.0,61.455,0.0,7.894,0.000,4.762,1.467,1.444,0.000,3.5,0.000,22.08,14.692,24.176,0.0,18.000,16.667,0.575,6.425,0.00,ham
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
298,2.859,0.000,0.000,0.000,0.00,13.217,0.000,0.00,0.000,0.0,0.0,35.80,0.000,0.000,0.000,0.0,0.000,0.0,0.000,1.222,0.000,0.000,0.000,0.000,0.0,0.000,0.00,0.000,0.000,0.0,18.000,0.000,0.975,4.450,0.00,pork tenderloin
299,3.359,0.500,0.727,0.357,0.60,16.913,1.781,0.50,2.111,0.0,0.0,1.42,0.769,0.673,2.000,0.0,0.727,0.0,0.894,0.000,0.714,0.667,0.111,0.000,0.0,0.500,1.60,4.077,1.471,0.0,1.667,0.000,1.924,3.195,0.00,poultry sausage
300,0.000,0.000,0.000,0.000,22.00,0.000,0.000,0.00,0.000,0.0,0.0,0.00,3.231,0.000,0.000,0.0,0.000,0.0,6.191,11.333,0.000,0.000,0.000,0.000,66.0,0.000,0.00,0.000,0.000,0.0,0.000,0.000,4.473,0.000,0.00,pomegranate juice
301,0.013,18.889,1.091,5.714,19.20,0.174,8.125,3.25,15.556,0.0,0.0,4.00,1.077,5.455,16.667,0.0,1.273,0.0,9.128,4.444,4.048,1.267,0.111,2.609,0.0,0.083,6.24,4.615,4.529,0.0,0.000,0.000,6.327,0.000,7.94,jerusalem artichoke


Create a dictionary for `to_csv()` method calling:

In [22]:
to_csv_params: dict[str, str] = {
    "file": "ingredients_nutrients_percentage.csv",

    "file_path": "../data/datasets/processed/",
}

Save the transformed *Pandas* dataframe:

In [23]:
df.to_csv(to_csv_params["file_path"] + to_csv_params["file"], )