In [67]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from ipywidgets import interact, interactive, widgets
import requests
from urllib.parse import quote
import regex as re

import config # Imports a private API Key to test the functions

# Team Casimir Funk Nutritional Content Notebook
In this notebook, we create the our nutritional content functions. Moreover, we add the nutritional data to our food prices dataset.

In [70]:
def handle_query(query, food_df_dict, api_key, num_results = 10):
    '''
    Description
    --------------------------------------------------
    This is a function to handle the Food Query
    for our user.
    
    Inputs
    --------------------------------------------------
    + query : string; keywords to search the USDA 
            FoodData Central API for the user's
            desired food product
    + food_df_dict : dictionary; allows for food_df
            to be implemented and updated in 
            different functions
    + api_key : string; user's FoodData Central API
            key
    + num_results : integer; the number of results 
            the user wants in their query
    
    Outputs
    --------------------------------------------------
    + food_df is displayed
    '''
    encoded_query = quote(query)
    page_number = 1
    page_size = num_results

    url = f'https://api.nal.usda.gov/fdc/v1/foods/search?api_key={api_key}&query={encoded_query}&pageSize={page_size}&pageNumber={page_number}'

    response = requests.get(url)

    if response.status_code == 200:
        data = response.json()
    else:
        print(f"Error: {response.status_code}")
    
    food_df = pd.DataFrame(data['foods'])
    
    food_df_dict['food_df'] = food_df
    display(food_df.iloc[:, :10])

In [72]:
def get_nutritional_content(food_df, idx = 0, computation = False):
    '''
    Description
    --------------------------------------------------
    This is a function to let the user see the details
    (detail_df) and nutritional contents 
    (nutritional_df) of individual food products.
    
    Inputs
    --------------------------------------------------
    + food_df : pandas dataframe; contains all the
            details & nutritional information for the
            different food products
    + idx : integer; corresponds to the row index of
            food product the user want to look at
    + computation : boolean; determines whether the
            detail_df & nutritional_df should be 
            displayed (important for the widget), or
            if the nutritional_df should be returned
            (for the projects data analysis section)
    
    Outputs
    --------------------------------------------------
    compuation = False : 
    + detail_df & nutritional_df are displayed
    compuation = True : 
    + nutritional_df : pandas dataframe; nutritional
            content of a single food product of 
            interest
    '''
    desired_cols = ['fdcId', 'description', 'brandOwner', 'brandName', 'marketCountry']
    avail_cols = [col for col in desired_cols if col in food_df.columns]
    
    detail_df = pd.DataFrame(food_df.loc[idx, avail_cols])
    detail_df.rename(columns = {idx : 'Details'}, inplace = True)
    
    
    nutritional_df = pd.DataFrame(food_df.loc[idx, 'foodNutrients'])
    
    desired_cols2 = ['nutrientName', 'value', 'unitName', 'percentDailyValue']
    avail_cols2 = [col for col in desired_cols2 if col in nutritional_df.columns]
    nutritional_df = nutritional_df[avail_cols2]
    
    if computation:
        return nutritional_df
    else:
        display(detail_df)
        display(nutritional_df)

In [74]:
def interactive_query(api_key):
    '''
    Description
    --------------------------------------------------
    This is a function that makes handle_query()
    interactive.

    Inputs
    --------------------------------------------------
    + api_key : string; user's FoodData Central API
            key
    
    Outputs
    --------------------------------------------------
    + food_df_dict : dictionary; allows for food_df
            to be implemented and updated in 
            different functions
    '''
    food_df_dict = {}
    widget = interactive(handle_query, 
                               food_df_dict = widgets.fixed(food_df_dict),
                               query = widgets.Text(description='Enter Query Here'),  
                               num_results = (1, 20, 1),
                               api_key = widgets.fixed(api_key)
                        )
    display(widget)
    return food_df_dict

In [76]:
def interactive_get_nc(food_df_dict):
    '''
    Description
    --------------------------------------------------
    This is a function that makes 
    get_nutritional_content() interactive.

    Inputs
    --------------------------------------------------
    + food_df_dict : dictionary; allows for food_df
            to be implemented and updated in 
            different functions
    
    Outputs
    --------------------------------------------------
    + Interactive widget is displayed
    '''
    widget = interactive(get_nutritional_content, food_df = widgets.fixed(food_df_dict['food_df']), 
                  idx = widgets.IntSlider(value = 0, min = 0, max = len(food_df_dict['food_df']) - 1, step = 1, description='Food Index'), 
                         computation = widgets.fixed(False)
                        )
    display(widget)

In [121]:
price = pd.read_csv('./data/food_prices_dummy.csv').set_index('Food item')
price.head()

Unnamed: 0_level_0,GTIN/UPC,Price at TJs,Quantity,Unit,Brand/Type at TJs,GTIN/UPC.1,Price at Safeway,Quantity,Unit.1,Brand/Type at Safeway,GTIN/UPC.2,Price at Berkeley Bowl ($),Quantity.1,Unit.2,Brand/Type at Berkeley Bowl,GTIN/UPC.3,Price at Whole Foods,Quantity.2,Unit.3,Brand/Type at Whole Foods
Food item,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
Bread,,$3.49,24.0,oz,TJs Sourdough Sandwich Bread,,$4.99,24.0,oz,Signature SELECT Bread Long Sliced Loaf San Fr...,,7.69,20.0,oz,Oroweat Organic Thin-Sliced Rustic White Bread...,,$5.99,12.0,oz,Soft & Delicious White Sandwich Bread
Rice,,$3.29,3.0,lb,TJs Jasmine Rice,21130500000.0,$6.79,5.0,lb,Mahatma Jasmine Thai Fragrant Long Grain Rice ...,,2.09,1.0,lb,White Basmati Rice,,$3.69,1.0,lb,Organic White Long Grain Rice
Pasta,,$0.99,1.0,lb,TJs Orzo Italian Pasta,,$1.50,16.0,oz,Signature SELECT Pasta Penne Rigate Box,,2.59,16.0,oz,Soft & Delicious White Sandwich Bread,,$1.59,16.0,oz,"Pasta, Spaghetti"
Apples,,$1.29,1.0,each,Honeycrisp Apples,,$2,1.0,each,Honeycrisp apples,,1.59,1.0,lb,,,$2.49,1.0,lb,Honeycrisp Apples
Tomatoes,,$2.99,1.0,lb,TJs Campari Tomatoes,,$3.49,1.0,lb,Red roma tomato,,1.59,1.0,lb,Roma Tomatoes,,$2.99,1.0,lb,Roma Tomato


In [80]:
price_df_dict = {}

In [103]:
brands = price.filter(regex = '^Brand').columns
brands = [re.findall(r'Brand/Type at (.+)', col)[0] for col in brands]
brands

['TJs', 'Safeway', 'Berkeley Bowl', 'Whole Foods']

In [119]:
num_sub_df_cols = 5
num_sub_dfs = int(price.shape[1] / num_sub_df_cols)
num_sub_dfs

4

In [125]:
price.iloc[:, :5]

Unnamed: 0_level_0,GTIN/UPC,Price at TJs,Quantity,Unit,Brand/Type at TJs
Food item,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Bread,,$3.49,24.0,oz,TJs Sourdough Sandwich Bread
Rice,,$3.29,3.0,lb,TJs Jasmine Rice
Pasta,,$0.99,1.0,lb,TJs Orzo Italian Pasta
Apples,,$1.29,1.0,each,Honeycrisp Apples
Tomatoes,,$2.99,1.0,lb,TJs Campari Tomatoes
Lettuce,,$3.49,1.0,each,Romaine Hearts
Eggs,,$4.99,1.0,doz,Pasture Raised Large Brown Eggs
Milk,,$4.99,64.0,Fl Oz,Organic Reduced Fat Milk
Frozen Shrimp,,$11.99,1.0,lb,Wild Raw Argentinian Red Shrimp
Chicken,,$2.99,1.0,lb,Organic Chicken Drumsticks
