https://github.com/nutritionix/library-python
https://github.com/leetrout/python-nutritionix

In [95]:
import requests
import json
import pandas as pd
import numpy as np

## The code below documents the building of the initial data tables through API requests to the nutrionix API, which we are using the free "hacker" plan for. It also includes methods to make new queries to the API, get those results, convert them into data frames, and append them to the original results. 

### API Limits:
* /v2/natural/...
    * 200 per day
* /v2/items
    * 50 per day 

In [2]:
headers = {
    'x-app-id': "677b211d", 
    'x-app-key': "742d50f2d169f8cc88df795d499f9ff9",
    'Content-Type': 'application/json'
          }

In [64]:
# example of accessing exercise endpoint
exercise = "https://trackapi.nutritionix.com/v2/natural/exercise"
query1 = {"query": "running"}
response1 = requests.post(exercise, headers=headers, json=query1)
running = json.loads(response1.content.decode("utf-8"))

# using exercise information to calculate calories for user
running_cals = running['exercises'][0]['nf_calories']
running_time = running['exercises'][0]['duration_min']
def calc_cals(user_time):
    return running_cals/running_time*user_time

In [5]:
# example of accessing nutrients endpoint
nutrients = "https://trackapi.nutritionix.com/v2/natural/nutrients"
query2 = {"query": "pizza"}
response2 = requests.post(nutrients, headers=headers, json=query2)
pizza = json.loads(response2.content.decode("utf-8"))

In [3]:
# example of accessing instant endpoint
"""
THIS IS THE MAIN FOOD ENDPOINT WE WILL BE USING

Each api call will search the string inputted in the query and return two lists, 
a "common" list and a "branded" list of foods. 
Each list will contain 20 of the top search items for that inputted query string. 

In total, we can get 40 results for one API call to the instant endpoint.

Furthermore, for each of those 20 "common" results, we will need to do an additional
call to the nutrients endpoint to get the calorie amount for that food.

This is not necessary for branded foods, as they will have calories already included .
"""
instant = "https://trackapi.nutritionix.com/v2/search/instant"
query3 = {"query": "big mac"}
query = {"time": "0100", 'age': '20'}
response3 = requests.post(instant, headers=headers, json=query3)
big_mac = json.loads(response3.content.decode("utf-8"))

# getting calories of branded food from result of instant endpoint
print(big_mac['branded'][0])
big_mac_cals = big_mac['branded'][0]['nf_calories']
print(big_mac_cals)

{'food_name': 'Big Mac', 'serving_unit': 'burger', 'nix_brand_id': '513fbc1283aa2dc80c000053', 'brand_name_item_name': "McDonald's Big Mac", 'serving_qty': 1, 'nf_calories': 540, 'photo': {'thumb': 'https://d2eawub7utcl6.cloudfront.net/images/nix-apple-grey.png', 'highres': None, 'is_user_uploaded': False}, 'brand_name': "McDonald's", 'region': 1, 'brand_type': 1, 'nix_item_id': '513fc9e73fe3ffd40300109f', 'locale': 'en_US'}
540


In [221]:
# example getting calories of common foods from result of instant endpoint, using nutrients endpoint
big_mac_food_name = big_mac['common'][0]['food_name']
query4 = {"query": big_mac_food_name}
response4 = requests.post(nutrients, headers=headers, json=query4)
big_mac_common4 = json.loads(response4.content.decode("utf-8"))
big_mac_common4

{'foods': [{'food_name': 'big mac',
   'brand_name': None,
   'serving_qty': 1,
   'serving_unit': 'burger',
   'serving_weight_grams': 212,
   'nf_calories': 540,
   'nf_total_fat': 28,
   'nf_saturated_fat': 10,
   'nf_cholesterol': 80,
   'nf_sodium': 950,
   'nf_total_carbohydrate': 45,
   'nf_dietary_fiber': 3,
   'nf_sugars': 9,
   'nf_protein': 25,
   'nf_potassium': None,
   'nf_p': None,
   'full_nutrients': [{'attr_id': 203, 'value': 25},
    {'attr_id': 204, 'value': 28},
    {'attr_id': 205, 'value': 45},
    {'attr_id': 208, 'value': 540},
    {'attr_id': 269, 'value': 9},
    {'attr_id': 291, 'value': 3},
    {'attr_id': 301, 'value': 100},
    {'attr_id': 303, 'value': 4.5},
    {'attr_id': 307, 'value': 950},
    {'attr_id': 318, 'value': 500},
    {'attr_id': 401, 'value': 1.2},
    {'attr_id': 601, 'value': 80},
    {'attr_id': 605, 'value': 1},
    {'attr_id': 606, 'value': 10}],
   'nix_brand_name': "McDonald's",
   'nix_brand_id': '513fbc1283aa2dc80c000053',
   'ni

In [48]:
# getting nutrition information for branded foods using item endpoint, but it's not working right now
big_mac_nix_id = big_mac['branded'][0]['nix_item_id']
big_mac_nix_id
item = "https://trackapi.nutritionix.com/v2/search/item"
query4 = {"nix_item_id":'513fc9e73fe3ffd40300109f'}
response4 = requests.get(item, headers=headers, json=query4)
big_mac_info = json.loads(response4.content.decode("utf-8"))
big_mac_info

{'message': '"value" must contain at least one of [upc, nix_item_id]',
 'id': 'ba37df55-51a3-4ae4-a95e-0b1badfedc1e'}

### Other attempts of pulling data with larger text queries

In [88]:
query5 = {"query": "big mac pizza caesar salad skittles cheesecake"}
response5 = requests.post(instant, headers=headers, json=query5)
multiple = json.loads(response5.content.decode("utf-8"))

In [96]:
foods = "asparagus apples alfalfa acorn squash almond arugala artichoke applesauce ahi tuna albacore Apple juice Avocado roll Bruscetta bacon black beans bagels baked beans BBQ bison barley beer bisque bluefish bread broccoli buritto babaganoosh Cabbage cake carrots carne asada celery cheese chicken catfish chips chocolate chowder clams coffee cookies corn cupcakes crab curry cereal chimichanga dates dips duck dumplings donuts eggs enchilada eggrolls English muffins edimame eel sushi fajita falafel fish franks fondu French toast French dip Garlic ginger gnocchi goose granola grapes green beans Guancamole gumbo grits Graham crackers ham halibut hamburger cheeseburgers bacon cheeseburgers honey huenos rancheros hash browns hot dogs haiku roll hummus ice cream Irish stew Indian food Italian bread jambalaya jelly jam jerky jalapeño kale kabobs ketchup kiwi kidney beans kingfish lobster Lamb Linguine Lasagna Meatballs Moose Milk Milkshake Noodles Ostrich Pizza Pepperoni Porter Pancakes Quesadilla Quiche Reuben Spinach Spaghetti Tater tots Toast Venison Waffles Wine Walnuts Yogurt Ziti Zucchini soup".lower()
print(len(foods.split(" ")))
foods_query = {"query": foods}
foods_response = requests.post(instant, headers=headers, json=foods_query)
foods = json.loads(foods_response.content.decode("utf-8"))

158


### Writing dictionary to a json file

In [225]:
# Writing a dictionary into a json file
with open("foods.json", 'w') as file:
    json.dump(foods,file)
with open("big_mac.json", 'w') as file:
    json.dump(big_mac,file)
with open("multiple.json", 'w') as file:
    json.dump(multiple,file)

### Building Tables from JSON API results

In [244]:
# for "branded" foods, keep the fields: ['food_name', 'brand_name_item_name', 'brand_name', 'serving_unit', 'serving_qty', 'nf_calories']
brand_cols = ['food_name', 'brand_name_item_name', 'brand_name', 'serving_unit', 'serving_qty', 'nf_calories']
def build_branded_table(result):
    brand_cols = ['food_name', 'brand_name_item_name', 'brand_name', 'serving_unit', 'serving_qty', 'nf_calories']
    table = []
    for i in range(len(result['branded'])):
        row = []
        for col in brand_cols:
            row.append(result['branded'][i][col])
        table.append(row)
    branded = pd.DataFrame(table, columns = brand_cols)
    branded.drop_duplicates(subset = 'food_name', keep=False, inplace=True)
    return branded

In [237]:
"""
Rewriting this code so that it can take in a huge list of search results, where each search result 
is a dictionary with 'common' and 'branded', and within each of those is a list of individual
results, from which i can pull a "food_name". 

Ultimately, the goal is to iterate through all lists and pull out every single "food_name" in 
every search result, and put them into a string separated by a comma and a space, like ", ". 

However, as I am designing this method, I want it to primarily work for the typical case of
building a table from the results of a user's query. Which means on a regular basis it would
take in just a list of results as would be typical of the value of the 'common' key in a single
api query result. 

Externally, I'll need to write a bigger function that uses this as a helper function that
only inputs a list of individual food results
"""
# for "common" foods, keep the fields: ['food_name', 'tag_name', 'serving_unit', 'serving_qty']

"""
Input: a list of individual food results, each of which is in dictionary format.
If coming from a single API instant query, then simply input query_response['common'].
Else, reformat data to be in a full list form.
"""
def build_common_table(result):
    common_cols = ['food_name', 'tag_name','serving_unit', 'serving_qty', 'nf_calories']
    common_fields = ['food_name', 'tag_name','serving_unit', 'serving_qty']
    table = []
    food_names = []
    for i in range(len(result)): 
        result_keys = result[i].keys()
        row = []
        for col in common_fields:
            if col in result_keys:
                row.append(result[i][col])
            else:
                row.append("?")
        table.append(row)
        food_names.append(result[i]['food_name'])
        
    table = np.array(table)
    calories = common_api_get_cals(food_names)
    print(table.shape)
    print(calories.shape)
    
    full_table = np.concatenate((table, calories.T), axis=1)
    common = pd.DataFrame(full_table, columns = common_cols)
    return common

The reason I make the below edit to common_api_get_calls and return the calories in a dictionary format rather than simply a list of calories is because I'm running into this error where each input result is unique, but the nutrients API does not have an output for every single input, thus meaning that I am only getting 2188 calorie numbers, when in fact I inputted 2223 different types of foods. However, due to the nature of the API input query and how it is just one string with each item separated by ", " there's no way to distinguish which result row returned something meaningful and which ones didn't, at least in the current form, so I needed to edit this method to return a dictionary. That way, a new list of calories can be built such that if the calorie count for a particular food didn't come back, we just have a "?" there. 

This can be dealth with later down the line, in enforcing the schema to not take in rows that do not have calorie counts.

In [239]:
def common_api_get_cals(food_names_list):
    nutrients = "https://trackapi.nutritionix.com/v2/natural/nutrients"
    food_names_string = ", ".join(food_names_list)
    query = {'query': food_names_string}
    response = requests.post(nutrients, headers=headers, json=query)
    response_json = json.loads(response.content.decode("utf-8"))
    calories_dict = dict()
    for item in response_json['foods']:
        calories_dict[item['food_name']] = item['nf_calories']
#     calories = np.array([[item['nf_calories'] for item in response_json['foods']]])
    calories = np.array([[calories_dict[name] if name in calories_dict else "?" for name in food_names_list]])
    return calories

In [128]:
# just checking if my build_common_table works
trial_table = build_common_table(food_list_jsons[12]['common'])
trial_table.head()

Unnamed: 0,food_name,tag_name,serving_unit,serving_qty,nf_calories
0,apple,apple,"medium (3"" dia)",1,94.64
1,apples,apple,"medium (3"" dia)",1,94.64
2,apple pie,Apple pie,"piece (1/8 of 9"" dia)",1,296.25
3,applepear,Asian pear,"fruit 2-1/4"" high x 2-1/2"" dia",1,51.24
4,appletini,appletini,cocktail,1,149.11


In [260]:
"""
ONLY USED TO BUILD THE ORIGINAL TABLES, NOT USED AT A LATER TIME, DEC 7, 2019
THIS CODE IS DEPRECATED

Currently I've made three different queries to the instant endpoint, I don't want to waste that data so
I'm just going to build those tables now and append them to form two large dataframes, common and branded,
and then I will do more comprehensive querying and build more items into the tables.

all_queries = [big_mac,multiple,foods]
"""
def build_temp_curr_tables():
    big_mac_common = build_common_table(big_mac)
    big_mac_branded = build_branded_table(big_mac)
    multiple_common = build_common_table(multiple)
    multiple_branded = build_branded_table(multiple)
    foods_common = build_common_table(foods)
    foods_branded = build_branded_table(foods)
    
    common = big_mac_common.append(multiple_common,ignore_index=True).append(foods_common,ignore_index=True)
    branded = big_mac_branded.append(multiple_branded,ignore_index=True).append(foods_branded,ignore_index=True)
    return common, branded

In [265]:
def add_to_curr_tables(new_common,new_branded):
    og_common = pd.read_csv("common.csv")
    full_common = og_common.append(new_common,ignore_index=True)
    full_common.to_csv("common.csv",index=False)
    print("common.csv updated")
    
    og_branded = pd.read_csv("branded.csv")
    full_branded = og_branded.append(new_branded,ignore_index=True)
    full_branded.to_csv("branded.csv",index=False)
    print("branded.csv updated")

### Sun, Dec 8, 2019, getting new data by making query for each search item in list "foods"

In [28]:
foods = "asparagus apples alfalfa acorn squash almond arugala artichoke applesauce ahi tuna albacore Apple juice Avocado roll Bruscetta bacon black beans bagels baked beans BBQ bison barley beer bisque bluefish bread broccoli buritto babaganoosh Cabbage cake carrots carne asada celery cheese chicken catfish chips chocolate chowder clams coffee cookies corn cupcakes crab curry cereal chimichanga dates dips duck dumplings donuts eggs enchilada eggrolls English muffins edimame eel sushi fajita falafel fish franks fondu French toast French dip Garlic ginger gnocchi goose granola grapes green beans Guancamole gumbo grits Graham crackers ham halibut hamburger cheeseburgers bacon cheeseburgers honey huenos rancheros hash browns hot dogs haiku roll hummus ice cream Irish stew Indian food Italian bread jambalaya jelly jam jerky jalapeño kale kabobs ketchup kiwi kidney beans kingfish lobster Lamb Linguine Lasagna Meatballs Moose Milk Milkshake Noodles Ostrich Pizza Pepperoni Porter Pancakes Quesadilla Quiche Reuben Spinach Spaghetti Tater tots Toast Venison Waffles Wine Walnuts Yogurt Ziti Zucchini soup".lower()
food_list = foods.split(" ")

def search_foods(food_list):
    instant = "https://trackapi.nutritionix.com/v2/search/instant"
    headers = {
    'x-app-id': "677b211d", 
    'x-app-key': "742d50f2d169f8cc88df795d499f9ff9",
    'Content-Type': 'application/json'
          }
    search_results = []
    for food_item in food_list:
        search_query = {'query': food_item}
        response = requests.post(instant, headers=headers, json=search_query)
        response_json = json.loads(response.content.decode("utf-8"))
        search_results.append(response_json)
    return search_results

In [30]:
food_list_jsons = search_foods(food_list)

In [249]:
import functools

def build_branded_table_from_list_of_jsons(jsons_list):
    df_results = []
    for json_result in jsons_list:
        df_results.append(build_branded_table(json_result))
    full_table = functools.reduce(lambda x,y:x.append(y,ignore_index=True), df_results)
    # drop duplicate rows
    full_table.drop_duplicates(subset='food_name',keep=False, inplace=True)
    return full_table    

In [251]:
new_branded_table = build_branded_table_from_list_of_jsons(food_list_jsons)
print(len(new_branded_table))
new_branded_table.head()

2398


Unnamed: 0,food_name,brand_name_item_name,brand_name,serving_unit,serving_qty,nf_calories
0,Fresh Asparagus,Giorgio Fresh Asparagus,Giorgio,oz,3.0,20.0
1,Asparagus,Fresh 1 Asparagus,Fresh 1,kilogram,1.0,191.0
2,Asparagus & Pea,Saladworks Asparagus & Pea,Saladworks,oz,1.0,40.0
3,Asparagus Spears,Great Value Asparagus Spears,Great Value,cup,0.75,20.0
4,Asparagus Medley,Safeway Farms Asparagus Medley,Safeway Farms,cup,1.0,20.0


In order to utilize my build_common_table method, I need to input a single list of all the individual food result dictionaries. Currently, food_list_jsons is a list of 155 items, each of which is a result of a particular query. Each of these 155 result items is a dictionary of two keys, one of which is 'common'. After accessing the value of that 'common' key, we get a list of individual food results. So I need to extract those.

In [229]:
def build_common_table_from_list_of_jsons(jsons_list):
    food_results = [item for sublist in jsons_list for item in sublist['common']]
    new_list = []
    # below is to eliminate duplicates
    for d in food_results:
        if d not in new_list:
            new_list.append(d)
            
    table = build_common_table(new_list)
    return table

In [240]:
new_common_table = build_common_table_from_list_of_jsons(food_list_jsons)
print(len(new_common_table))
new_common_table.head()

(2223, 4)
(1, 2223)
2223


Unnamed: 0,food_name,tag_name,serving_unit,serving_qty,nf_calories
0,asparagus,asparagus,spears,5,16.5
1,asparagus dip,asparagus dip,cup,1,622.06
2,asparagus soup,cream of asparagus soup,cup (8 fl oz),1,161.2
3,asparagus stalk,asparagus stalks,spears,5,16.5
4,asparagus pizza,white asparagus pizza,medium slice,1,306.91


Sun, Dec 8, 2019  
Both the new common and the new branded table have been built, so I can now combine it with the previous stuff I had. All my new methods ensure no duplicates, but the old data that I'm adding onto definitely will still have duplicates. So we'll definitely need to ensure in creating our sql tables that
1) There are no duplicate rows
2) There are no rows that have a "?" in the calories column

### Getting Exercise Data

Users have the option either 
1. simply inputting an exercise
2. adding extra information
    * sex
    * weight_kg
    * height_cm
    * age
    
Taking action 1 will always attempt to query our SQL database before it resorts to using the API. Taking action 2 will always use the API and will never store that data in the SQL database.

In [272]:
# example of accessing exercise endpoint
"""
Takes in a string query like "running", or several queries together in one string separated by ", "
like "running, tennis, hiking"
"""
def get_exercise(exercise_input, gender=None, weight_kg=None,height_cm=None,age=None):
    exercise = "https://trackapi.nutritionix.com/v2/natural/exercise"
    if gender is not None and weight_kg is not None and height_cm is not None and age is not None:
        query = {"query": exercise_input, 'gender':gender,'weight_kg':weight_kg,'height_cm':height_cm,'age':age}
    else:
        query = {"query": exercise_input}
    response = requests.post(exercise, headers=headers, json=query)
    results = json.loads(response.content.decode("utf-8"))
    return results

In [367]:
def get_baseline_exercises():
    gym_exercises = "Squat,Leg press,Lunge,Deadlift,Leg extension,Leg curl,Standing calf raise,Seated calf raise,Hip adductor,Bench press,Chest fly,Push-up,Pull-down,Pull-up,Bent-over row,Upright row,Shoulder press,Shoulder fly,Lateral raise,Shoulder shrug,Pushdown,Triceps extension,Biceps curl,Crunch,Russian twist,Leg raise,Back extension"
    gym_exercises = gym_exercises.lower().split(",")
    gym_exercises = ", ".join(gym_exercises)
    sports = "Archery Badminton Cricket Bowling Boxing Curling Tennis Skateboarding Surfing Hockey Figure skating Yoga Fencing Fitness Gymnastics Karate Volleyball Weightlifting Basketball Baseball Rugby Wrestling High jumping Hang gliding Car racing Cycling Running Table tennis Fishing Judo Climbing Billiards Pool Shooting Horse racing Horseback riding Golf Football Soccer American football Swimming"
    sports = sports.lower().split(" ")
    remove = ['figure', 'skating','high', 'jumping','hang','gliding','car','racing','table','tennis','horse','racing','horseback','riding','american','football']
    for sprt in remove:
        sports.remove(sprt)
    add = ["figure skating",'high jumping', 'hang gliding', 'table tennis', 'horseback riding', 'american football']
    for sprt in add:
        sports.append(sprt)
    sports = ", ".join(sports)
    dance = 'waltz,tango,cha cha,rumba,samba,mambo,quickstep,jive,flamenco,swing,tap,salsa,capoiera,ballet,ballroom'
    dance = dance.split(',')
    dance = ", ".join(dance)
    other = ['walk','run','climb','rock climb','hiking','speed walking','jogging']
    other = ", ".join(other)
    exercises = gym_exercises+", "+sports+", "+dance+", "+other
    return get_exercise(exercises)

In [None]:
baseline_exercises = get_baseline_exercises()

In [387]:
# we want to keep the fields 'name', 'duration_min', 'met', 'nf_calories'
def build_exercise_table(exercises):
    table = []
    exercise_fields = ['name','duration_min','met','nf_calories']
    for i in range(len(exercises['exercises'])):
        row = []
        for col in exercise_fields:
            row.append(exercises['exercises'][i][col])
        table.append(row)
    exercise_table = pd.DataFrame(table, columns = exercise_fields)
    return exercise_table

In [388]:
exercise_table = build_exercise_table(baseline_exercises)

In [390]:
exercise_table.to_csv('exercises.csv',index=False)