# Instructions
This notebook downloads json data from [USDA FoodData Central](https://fdc.nal.usda.gov/download-datasets.html). The data pulled is the "Foundation Foods" and only the food portions are downloaded. The data is then processed and saved to CosmosDB.

### Parameters

In [106]:
# URL of the zip file to download
zip_url = "https://fdc.nal.usda.gov/fdc-datasets/FoodData_Central_foundation_food_json_2023-10-26.zip"
json_file = "foundationDownload.json"
# Path to save the downloaded zip file
zip_file_path = "data.zip"
# Path to extract the contents of the zip file
extract_path = "extracted_data"

In [107]:
import pandas as pd
import requests
import zipfile
import json

### Download and unzip USDA foundation foods json file

In [108]:
# Download the zip file
response = requests.get(zip_url)
with open(zip_file_path, "wb") as file:
    file.write(response.content)

# Extract the contents of the zip file
with zipfile.ZipFile(zip_file_path, "r") as zip_ref:
    zip_ref.extractall(extract_path)

# Path to the extracted JSON file
json_file_path = f"{extract_path}/{json_file}"

# Read the JSON file
with open(json_file_path, "r") as file:
    data = json.load(file)


df = pd.DataFrame(data["FoundationFoods"])
df = df[["fdcId","description", "foodPortions", "foodNutrients"]]
df.head(10)

Unnamed: 0,fdcId,description,foodPortions,foodNutrients
0,321358,"Hummus, commercial","[{'id': 118804, 'value': 2.0, 'measureUnit': {...","[{'type': 'FoodNutrient', 'id': 2219707, 'nutr..."
1,321360,"Tomatoes, grape, raw","[{'id': 118808, 'value': 5.0, 'measureUnit': {...","[{'type': 'FoodNutrient', 'id': 2219983, 'nutr..."
2,321611,"Beans, snap, green, canned, regular pack, drai...","[{'id': 118859, 'value': 1.0, 'measureUnit': {...","[{'type': 'FoodNutrient', 'id': 2220526, 'nutr..."
3,323121,"Frankfurter, beef, unheated","[{'id': 118987, 'value': 1.0, 'measureUnit': {...","[{'type': 'FoodNutrient', 'id': 2227684, 'nutr..."
4,323294,"Nuts, almonds, dry roasted, with salt added","[{'id': 119012, 'value': 1.0, 'measureUnit': {...","[{'type': 'FoodNutrient', 'id': 2228441, 'nutr..."
5,323505,"Kale, raw","[{'id': 119057, 'value': 1.0, 'measureUnit': {...","[{'type': 'FoodNutrient', 'id': 2229543, 'nutr..."
6,323604,"Egg, whole, raw, frozen, pasteurized","[{'id': 119060, 'value': 1.0, 'measureUnit': {...","[{'type': 'FoodNutrient', 'id': 2229844, 'nutr..."
7,323697,"Egg, white, raw, frozen, pasteurized","[{'id': 119063, 'value': 1.0, 'measureUnit': {...","[{'type': 'FoodNutrient', 'id': 2230115, 'nutr..."
8,323793,"Egg, white, dried","[{'id': 119069, 'value': 1.0, 'measureUnit': {...","[{'type': 'FoodNutrient', 'id': 2230379, 'nutr..."
9,324317,"Onion rings, breaded, par fried, frozen, prepa...","[{'id': 119129, 'value': 1.0, 'measureUnit': {...","[{'type': 'FoodNutrient', 'id': 2232163, 'nutr..."


### Extract food portions for each food item in json file

In [110]:
nutrients = [
    "298", "606", "646", "645", "601", "307", "205", 
    "291", "203", "328", "301", "303", "306", "208", 
    "269.3"
]
df["foodPortions"] = df["foodPortions"].apply(lambda x: [{'gramWeight': y["gramWeight"], 'value': y["value"], 'modifier': y["modifier"] if "modifier" in y else '', 'uom': y["measureUnit"]["abbreviation"]} for y in x])
df["foodNutrients"] = df["foodNutrients"].apply(lambda x: [{'number': y["nutrient"]["number"], 'name': y["nutrient"]["name"], 'uom': y["nutrient"]["unitName"], 'amount': y["amount"] if "amount" in y else 0} for y in x])
df["foodNutrients"] = df["foodNutrients"].apply(lambda x: [y for y in x if y["amount"] > 0 and (y["number"] in nutrients)])

df.head(10)

Unnamed: 0,fdcId,description,foodPortions,foodNutrients
0,321358,"Hummus, commercial","[{'gramWeight': 33.9, 'value': 2.0, 'modifier'...","[{'number': '606', 'name': 'Fatty acids, total..."
1,321360,"Tomatoes, grape, raw","[{'gramWeight': 49.7, 'value': 5.0, 'modifier'...","[{'number': '301', 'name': 'Calcium, Ca', 'uom..."
2,321611,"Beans, snap, green, canned, regular pack, drai...","[{'gramWeight': 129.0, 'value': 1.0, 'modifier...","[{'number': '301', 'name': 'Calcium, Ca', 'uom..."
3,323121,"Frankfurter, beef, unheated","[{'gramWeight': 48.6, 'value': 1.0, 'modifier'...","[{'number': '205', 'name': 'Carbohydrate, by d..."
4,323294,"Nuts, almonds, dry roasted, with salt added","[{'gramWeight': 135.0, 'value': 1.0, 'modifier...","[{'number': '645', 'name': 'Fatty acids, total..."
5,323505,"Kale, raw","[{'gramWeight': 20.6, 'value': 1.0, 'modifier'...","[{'number': '203', 'name': 'Protein', 'uom': '..."
6,323604,"Egg, whole, raw, frozen, pasteurized","[{'gramWeight': 28.4, 'value': 1.0, 'modifier'...","[{'number': '306', 'name': 'Potassium, K', 'uo..."
7,323697,"Egg, white, raw, frozen, pasteurized","[{'gramWeight': 28.4, 'value': 1.0, 'modifier'...","[{'number': '306', 'name': 'Potassium, K', 'uo..."
8,323793,"Egg, white, dried","[{'gramWeight': 7.0, 'value': 1.0, 'modifier':...","[{'number': '203', 'name': 'Protein', 'uom': '..."
9,324317,"Onion rings, breaded, par fried, frozen, prepa...","[{'gramWeight': 20.2, 'value': 1.0, 'modifier'...","[{'number': '306', 'name': 'Potassium, K', 'uo..."


### Export to json

In [None]:
# df.to_csv('usda_foundation_foods.csv', index=False)
df.to_json('usda_foundation_foods.json', orient='records')