### Goal:
- Use the Food Data Central API to pull the nutritional content of 10 foods

### Skills Used:
- Web Scraping/APIs
- Data Wrangling
- Data Visualization
- Data structures (JSON)
- Data frames
- Summarizing data

In [None]:
# import packages
import pandas as pd
import numpy as np
import requests
from pandas.io.json import json_normalize #special package in pandas
import json
import matplotlib.pyplot as plt
%matplotlib inline


In [None]:
# set API key
apiKey = ""

# set  parameter
params = {'api_key': apiKey}

In [None]:
# sample call so we can see the structure of the json file
response = requests.post(
       r'https://api.nal.usda.gov/fdc/v1/search',
       params=params,
       json={'generalSearchInput': "apple"}
   )
    
# save to JSON
item = response.json()

In [None]:
print(item)

In [None]:
# create list of foods we want to pass through to our API call
food_list=["pizza","fried chicken", "apple", "banana", "bread", "smoothie", "fries", "fish", "shrimp", "crab"]

In [None]:
# counter that allows us to see which food word is being processed in the loop
counter = 0

# for each food, pull nutritional value

# use for loop to create api call
for food in food_list:

    # increment counter for sequence of foods
    counter=counter+1
    
    # send API request - this should look familiar! From Module 8 - Web Scraping/APIs
    response = requests.post(
        r'https://api.nal.usda.gov/fdc/v1/search',
        params=params,
        json={'generalSearchInput': food}
    )
    
    # save to JSON
    item = response.json()
    
    # get total matches (there can be multiple hits for one food keyword, so we need to take the length of the match object to see how many matches we got)
    l = [i for i in range(len(item['foods']))]
    
    # for each match
    for i in l:
        # create df of food nutrients across all matches - this should look familiar! From Module 5 - Data Structures
        if i == 0: # for the first match, initialize the dataframe
            rdf = pd.json_normalize(item['foods'][i]['foodNutrients']) # convert json to data frame
        else: # for subsequent matches, create a dataframe (with a different name to be concatenated)
            df = pd.json_normalize(item['foods'][i]['foodNutrients'])
            rdf = pd.concat([rdf,df]) # concatenate dataframes - this should look familiar! From Module 4 - Data Wrangling 

    # Now we have a dataframe with all the matches for a food word
    
    # recode variable values to simplify names
    rdf['nutrientName'].replace('Energy','Energy',inplace=True)
    rdf['nutrientName'].replace('Carbohydrate, by difference','Carbs',inplace=True)
    rdf['nutrientName'].replace('Protein','Protein',inplace=True)
    rdf['nutrientName'].replace('Fiber, total dietary','Fiber',inplace=True)
    rdf['nutrientName'].replace('Total lipid (fat)','Fat',inplace=True)
    rdf['nutrientName'].replace('Sodium, Na','Sodium',inplace=True)
    rdf['nutrientName'].replace('Fatty acids, total saturated','Sat_Fatty_Acids',inplace=True)
    rdf['nutrientName'].replace('Calcium, Ca','Calcium',inplace=True)
    rdf['nutrientName'].replace('Iron, Fe','Iron',inplace=True)
    rdf['nutrientName'].replace('Sugars, total including NLEA','Sugar',inplace=True)
    rdf['nutrientName'].replace('Cholesterol','Chol',inplace=True)
    rdf['nutrientName'].replace('Fatty acids, total trans','Trans_Fatty_Acids',inplace=True)
    rdf['nutrientName'].replace('Vitamin C, total ascorbic acid','VitaminC',inplace=True)
    rdf['nutrientName'].replace('Vitamin A, IU','VitaminA',inplace=True)
    rdf['nutrientName'].replace('Potassium, K','Potassium',inplace=True)
    rdf['nutrientName'].replace('Fatty acids, total polyunsaturated','Unsat_Fatty_Acids',inplace=True)   
  
    # Since we have multiple matches for a food word, lets aggregate to one row per food match/nutrient by finding the average 
    # nutrient value across all matches
    # find average nutrient
    agg_food = rdf[['nutrientName','value']].groupby(['nutrientName']).agg('mean')
    
    # reset index
    agg_food = agg_food.reset_index()
    
    # rename columns
    agg_food.columns = ["nutrientName","Value"]    
    
    # subset data to only include the nutrients we want
    agg_food = agg_food[agg_food["nutrientName"].isin(['Energy','Carbs','Protein','Fiber','Fat'])]

     
    # set food name into a variable so we know what values each food is associated with
    agg_food["Food"] = food 
        
    # transpose the data so the nutrient values are their own columns
    new=agg_food.pivot_table(index=["Food"], columns=['nutrientName'], values='Value')
    
    # reset the index
    new.reset_index(inplace=True)
    
    # append food match to a dataframe with all foods
    if counter ==1: # initialize the df
        all_foods=new
    else:
        all_foods=pd.concat([all_foods,new]) 

In [None]:
# print dataframe
all_foods

In [None]:
# RQ1: Is there an association between the amount of carbs and fiber in a food?
# Create a scatterplot
all_foods.plot.scatter(x='Carbs',y='Fiber')

# Add axis labels
plt.xlabel("Carbohydrates")
plt.ylabel("Fiber")

# add title
plt.title("RQ1: Is there an association between the amount of carbs and fiber in a food?")


In [None]:
# RQ2: Which food has the most carbs?
# Create a scatterplot
all_foods.plot.bar(x='Food',y='Carbs')

# Add axis labels
plt.xlabel("Food")
plt.ylabel("Carbs")

# add title
plt.title("RQ2: Which food has the most carbs?")

In [None]:
# RQ3: Which food has the most fat?
# Create a scatterplot
all_foods.plot.bar(x='Food',y='Fat')

# Add axis labels
plt.xlabel("Food")
plt.ylabel("Carbs")

# add title
plt.title("RQ3: Which food has the most fat?")
