# Classifying Menu Items as Vegetarian or Not

In [2]:
import os 
import pandas as pd
import requests
import requests_cache
from string import ascii_lowercase as lc_alphabet


requests_cache.install_cache('api_cache', expire_after=86400)  # Cache expires after 1 day (86400 seconds)

#### Create a Test Dataset

##### Plan is to request data for all their recipes and ingredients in them. Then join them together as a string in the form of [name] : [desc] and use that for testing two 'medium' language models. 

Getting data from TheMealDB by scrapping recipes per alphabet. 

In [3]:
#key provided by them
api_key = "1"  # Replace with your actual API key

In [4]:
# testing one alphabet
url = "https://www.themealdb.com/api/json/v1/1/search.php?f=a"

In [5]:
response = requests.get(url)

In [None]:
data = response.json()
data

In [None]:
len(data["meals"])

In [None]:
# writing a for loop to loop through all the letters

In [None]:
lc_alphabet

In [None]:
all_recipes = []

In [None]:
for letter in lc_alphabet:
    
    url = f"https://www.themealdb.com/api/json/v1/1/search.php?f={letter}"
    response = requests.get(url)
    
    if response.status_code == 200:
        data = response.json()

        if data["meals"] != None:

            all_recipes.extend(data["meals"])
        
    else:
        print(f"{letter} did not work")


In [None]:
letter

In [None]:
len(all_recipes) # why are there two left out? should be 303

In [None]:
food_df = pd.DataFrame(all_recipes)
food_df

In [None]:
ingredients_df = food_df.filter(regex=("strIngredient.*"))
ingredients_df.fillna("", inplace = True)
ingredients_df.head()

In [None]:
food_df["all_ingredients"] = ingredients_df.apply(lambda row: ",".join(x for x in row), axis = 1)
food_df.head()

In [None]:
food_df["test_string"] = food_df["strMeal"] + " : " + food_df["all_ingredients"]

In [None]:
food_df.head()

In [None]:
filename = "themealdb_recipes.csv"
food_df.to_csv(filename, index = False)

## Build your training set

one way I think of scraping for a dataset is to scrap for all the allowed cuisine parameter.

get all the recipes from all cuisines
use the recipe ID to then scrape ingredient and diet information 
string editing to get a training string


In [6]:
url = f"https://api.spoonacular.com/recipes/complexSearch?apiKey=1bbc1401ba7d48729507a4e73d397ab5&cuisine=italian"
response = requests.get(url)
data = response.json()
data.keys()

dict_keys(['results', 'offset', 'number', 'totalResults'])

In [None]:
data["totalResults"]

In [None]:
data["results"]

In [None]:
#?apiKey=1bbc1401ba7d48729507a4e73d397ab5&

In [7]:
cuisines = [
    "african", "asian", "american", "british", "cajun", "caribbean", 
    "chinese", "eastern%20european", "european", "french", "german", 
    "greek", "indian", "irish", "italian", "japanese", "jewish", 
    "korean", "latin%20american", "mediterranean", "mexican", 
    "middle%20eastern", "nordic", "southern", "spanish", "thai", 
    "vietnamese"
]

In [8]:
all_cuisines = []
for cuisine in cuisines:
    
    #get total number of results with only 1 actual reciepe returned
    first_url = f"https://api.spoonacular.com/recipes/complexSearch?apiKey=1bbc1401ba7d48729507a4e73d397ab5&cuisine={cuisine}&number=1"
    response = requests.get(first_url)
    data = response.json()
    total_results = data["totalResults"]
    
    #get all the results of that cuisine
    final_url = f"https://api.spoonacular.com/recipes/complexSearch?apiKey=1bbc1401ba7d48729507a4e73d397ab5&cuisine={cuisine}&number={total_results}"
    response = requests.get(final_url)
    data = response.json()
    
    all_cuisines.extend(data["results"])
        
        

In [9]:
pd.DataFrame(all_cuisines)["id"].drop_duplicates() # 948 unique reciepes

0        632003
1        653275
2        716426
3        642129
4        798400
         ...   
1231     664847
1232     664828
1233     655903
1234     664708
1235    1096211
Name: id, Length: 948, dtype: int64

In [10]:
str(all_cuisines[0]["id"])

'632003'

In [11]:
len(all_cuisines)

1236

In [12]:
# create a list of recipe ID to pass into bulk 
all_ids = []

for recipe in all_cuisines:
    
    if str(recipe["id"]) not in all_ids:
        all_ids.append(str(recipe["id"]))

id_string = ",".join(all_ids)
id_string

'632003,653275,716426,642129,798400,716311,646043,716217,644826,716364,665379,660231,647875,660290,637440,663177,652078,632822,648910,632835,641128,1096250,633088,648916,638642,643775,648506,641908,663126,641111,638649,1697543,634710,650484,663166,632983,632896,663113,634965,649077,642468,664830,652026,649199,637391,716338,640234,658482,649031,661740,632854,638369,661072,645541,1096217,651180,641565,663169,647874,1095994,1095955,643255,632881,649030,638389,647830,645634,632026,632901,645474,648758,651911,1096225,663149,637908,663078,735820,638496,660493,632796,646425,664835,658058,633649,646123,632806,649036,637264,654313,1095772,642941,637426,656777,642283,654373,653008,632862,663090,661117,648470,663074,638382,1132339,1046982,642540,639392,644581,633837,660395,638552,638764,636573,642695,1096277,775621,637897,673457,662376,637265,657226,649141,641627,634888,1095894,1697611,641845,638549,664011,660273,610281,662391,991010,650377,651190,637102,650546,1066893,1096060,665146,659058,66073

In [13]:
len(all_ids)

948

In [14]:
id_string = ",".join(all_ids[:474])

In [15]:
# get ingredient list in id_string

url = f"https://api.spoonacular.com/recipes/informationBulk?apiKey=1bbc1401ba7d48729507a4e73d397ab5&ids={id_string}"

response = requests.get(url)
data = response.json()

In [16]:
print("Quota Left Today:", response.headers.get("X-API-Quota-Left"))

Quota Left Today: -200.33


In [17]:
print("Quota Left Today:", response.headers.get("X-API-Quota-Left"))

Quota Left Today: -200.33


In [18]:
response.status_code

200

In [28]:
data[0]["extendedIngredients"][2]["nameClean"]

'green pepper'

### NEXT STEP: HAVE TO GET ALL 948

In [30]:
len(all_ids[474:])

474

In [29]:
id_string = ",".join(all_ids[474:])

In [31]:
# get ingredient list in id_string

url = f"https://api.spoonacular.com/recipes/informationBulk?apiKey=1bbc1401ba7d48729507a4e73d397ab5&ids=632003"

response = requests.get(url)
data_2 = response.json()

In [33]:
len(data_2)

474

In [34]:
data_2[0].keys()

dict_keys(['vegetarian', 'vegan', 'glutenFree', 'dairyFree', 'veryHealthy', 'cheap', 'veryPopular', 'sustainable', 'lowFodmap', 'weightWatcherSmartPoints', 'gaps', 'preparationMinutes', 'cookingMinutes', 'aggregateLikes', 'healthScore', 'creditsText', 'license', 'sourceName', 'pricePerServing', 'extendedIngredients', 'id', 'title', 'readyInMinutes', 'servings', 'sourceUrl', 'image', 'imageType', 'summary', 'cuisines', 'dishTypes', 'diets', 'occasions', 'winePairing', 'instructions', 'analyzedInstructions', 'originalId', 'spoonacularScore', 'spoonacularSourceUrl'])

In [35]:
data.extend(data_2)

In [36]:
len(data) # contains all the recipe bulk info for each id

948

In [99]:
# need to make new data structure of id and ingredients and then 
data[0]["vegan"]

True

In [40]:
data[0]["extendedIngredients"]#[2]["nameClean"]

[{'id': 16063,
  'aisle': 'Pasta and Rice',
  'image': 'black-eyed-peas.jpg',
  'consistency': 'SOLID',
  'name': 'black-eyed peas',
  'nameClean': 'blackeyed peas',
  'original': '2 cups dried black-eyed peas',
  'originalName': 'dried black-eyed peas',
  'amount': 2.0,
  'unit': 'cups',
  'meta': ['dried'],
  'measures': {'us': {'amount': 2.0, 'unitShort': 'cups', 'unitLong': 'cups'},
   'metric': {'amount': 344.0, 'unitShort': 'g', 'unitLong': 'grams'}}},
 {'id': 11124,
  'aisle': 'Produce',
  'image': 'sliced-carrot.png',
  'consistency': 'SOLID',
  'name': 'carrots',
  'nameClean': 'carrot',
  'original': '2 cups sliced carrots',
  'originalName': 'sliced carrots',
  'amount': 2.0,
  'unit': 'cups',
  'meta': ['sliced'],
  'measures': {'us': {'amount': 2.0, 'unitShort': 'cups', 'unitLong': 'cups'},
   'metric': {'amount': 256.0, 'unitShort': 'g', 'unitLong': 'grams'}}},
 {'id': 11333,
  'aisle': 'Produce',
  'image': 'green-pepper.jpg',
  'consistency': 'SOLID',
  'name': 'bell pe

In [100]:
# you need loop through data 
# loop through extended ingredients
recipe_ingredients = []

for recipe in data:
    
    all_ingredients = []
    one_recipe = {}
    
    for ingredient in recipe["extendedIngredients"]:
        
        if ingredient["nameClean"] is not None:
            all_ingredients.append(ingredient["nameClean"])
            
        elif ingredient["nameClean"] is None:
            all_ingredients.append(ingredient["name"])
        
    one_recipe["id"] = recipe["id"]
    one_recipe["vegetarian"] = recipe["vegetarian"]
    one_recipe["vegan"] = recipe["vegan"]
    one_recipe["ingredients"] = all_ingredients
        
    recipe_ingredients.append(one_recipe)
        
        

In [101]:
len(recipe_ingredients)

948

In [102]:
# need to form two dataframes and then merge them on id

all_recipes = pd.DataFrame(all_cuisines)
all_recipes.drop_duplicates(inplace = True)
all_recipes.head()

Unnamed: 0,id,title,image,imageType
0,632003,African Bean Soup,https://img.spoonacular.com/recipes/632003-312...,jpg
1,653275,North African Chickpea Soup,https://img.spoonacular.com/recipes/653275-312...,jpg
2,716426,"Cauliflower, Brown Rice, and Vegetable Fried Rice",https://img.spoonacular.com/recipes/716426-312...,jpg
3,642129,Easy To Make Spring Rolls,https://img.spoonacular.com/recipes/642129-312...,jpg
4,798400,Spicy Black-Eyed Pea Curry with Swiss Chard an...,https://img.spoonacular.com/recipes/798400-312...,jpg


In [103]:
all_recipes.shape

(948, 4)

In [104]:
id_ingredients = pd.DataFrame(recipe_ingredients)
id_ingredients.head()

Unnamed: 0,id,vegetarian,vegan,ingredients
0,632003,True,True,"[blackeyed peas, carrot, green pepper, onion, ..."
1,653275,True,True,"[canola oil, garlic, onion, leek, canned chick..."
2,716426,True,True,"[grape seed oil, coconut oil, spring onions, g..."
3,642129,False,False,"[fresh basil, butter lettuce, chili pepper, fi..."
4,798400,True,True,"[blackeyed peas, curry powder, garam masala, e..."


In [105]:
id_ingredients.shape

(948, 4)

In [106]:
merged_df = all_recipes.merge(id_ingredients, left_on="id", right_on="id")
merged_df.head()

Unnamed: 0,id,title,image,imageType,vegetarian,vegan,ingredients
0,632003,African Bean Soup,https://img.spoonacular.com/recipes/632003-312...,jpg,True,True,"[blackeyed peas, carrot, green pepper, onion, ..."
1,653275,North African Chickpea Soup,https://img.spoonacular.com/recipes/653275-312...,jpg,True,True,"[canola oil, garlic, onion, leek, canned chick..."
2,716426,"Cauliflower, Brown Rice, and Vegetable Fried Rice",https://img.spoonacular.com/recipes/716426-312...,jpg,True,True,"[grape seed oil, coconut oil, spring onions, g..."
3,642129,Easy To Make Spring Rolls,https://img.spoonacular.com/recipes/642129-312...,jpg,False,False,"[fresh basil, butter lettuce, chili pepper, fi..."
4,798400,Spicy Black-Eyed Pea Curry with Swiss Chard an...,https://img.spoonacular.com/recipes/798400-312...,jpg,True,True,"[blackeyed peas, curry powder, garam masala, e..."


In [107]:
# convert list of ingrd to string
merged_df["ingredients"] = merged_df["ingredients"].apply(lambda x: ",".join(x))

In [108]:
merged_df["train_string"] = merged_df["title"] + " : " + merged_df["ingredients"]
merged_df.head()

Unnamed: 0,id,title,image,imageType,vegetarian,vegan,ingredients,train_string
0,632003,African Bean Soup,https://img.spoonacular.com/recipes/632003-312...,jpg,True,True,"blackeyed peas,carrot,green pepper,onion,peanu...","African Bean Soup : blackeyed peas,carrot,gree..."
1,653275,North African Chickpea Soup,https://img.spoonacular.com/recipes/653275-312...,jpg,True,True,"canola oil,garlic,onion,leek,canned chickpeas,...","North African Chickpea Soup : canola oil,garli..."
2,716426,"Cauliflower, Brown Rice, and Vegetable Fried Rice",https://img.spoonacular.com/recipes/716426-312...,jpg,True,True,"grape seed oil,coconut oil,spring onions,garli...","Cauliflower, Brown Rice, and Vegetable Fried R..."
3,642129,Easy To Make Spring Rolls,https://img.spoonacular.com/recipes/642129-312...,jpg,False,False,"fresh basil,butter lettuce,chili pepper,fish s...","Easy To Make Spring Rolls : fresh basil,butter..."
4,798400,Spicy Black-Eyed Pea Curry with Swiss Chard an...,https://img.spoonacular.com/recipes/798400-312...,jpg,True,True,"blackeyed peas,curry powder,garam masala,eggpl...",Spicy Black-Eyed Pea Curry with Swiss Chard an...


In [109]:
merged_df.head()


Unnamed: 0,id,title,image,imageType,vegetarian,vegan,ingredients,train_string
0,632003,African Bean Soup,https://img.spoonacular.com/recipes/632003-312...,jpg,True,True,"blackeyed peas,carrot,green pepper,onion,peanu...","African Bean Soup : blackeyed peas,carrot,gree..."
1,653275,North African Chickpea Soup,https://img.spoonacular.com/recipes/653275-312...,jpg,True,True,"canola oil,garlic,onion,leek,canned chickpeas,...","North African Chickpea Soup : canola oil,garli..."
2,716426,"Cauliflower, Brown Rice, and Vegetable Fried Rice",https://img.spoonacular.com/recipes/716426-312...,jpg,True,True,"grape seed oil,coconut oil,spring onions,garli...","Cauliflower, Brown Rice, and Vegetable Fried R..."
3,642129,Easy To Make Spring Rolls,https://img.spoonacular.com/recipes/642129-312...,jpg,False,False,"fresh basil,butter lettuce,chili pepper,fish s...","Easy To Make Spring Rolls : fresh basil,butter..."
4,798400,Spicy Black-Eyed Pea Curry with Swiss Chard an...,https://img.spoonacular.com/recipes/798400-312...,jpg,True,True,"blackeyed peas,curry powder,garam masala,eggpl...",Spicy Black-Eyed Pea Curry with Swiss Chard an...


In [110]:
filename = "spoonacular_train_data.csv"
merged_df.to_csv(filename, index = False)