# Classifying Menu Items as Vegetarian or Not

In [2]:
import os 
import pandas as pd
import requests
import requests_cache
from string import ascii_lowercase as lc_alphabet


requests_cache.install_cache('api_cache', expire_after=86400)  # Cache expires after 1 day (86400 seconds)

#### Create a Test Dataset

##### Plan is to request data for all their recipes and ingredients in them. Then join them together as a string in the form of [name] : [desc] and use that for testing two 'medium' language models. 

Getting data from TheMealDB by scrapping recipes per alphabet. 

In [None]:
#key provided by them
api_key = "1"  # Replace with your actual API key

In [None]:
# testing one alphabet
url = "https://www.themealdb.com/api/json/v1/1/search.php?f=a"

In [None]:
response = requests.get(url)

In [None]:
data = response.json()
data

In [None]:
len(data["meals"])

In [None]:
# writing a for loop to loop through all the letters

In [None]:
lc_alphabet

In [None]:
all_recipes = []

In [None]:
for letter in lc_alphabet:
    
    url = f"https://www.themealdb.com/api/json/v1/1/search.php?f={letter}"
    response = requests.get(url)
    
    if response.status_code == 200:
        data = response.json()

        if data["meals"] != None:

            all_recipes.extend(data["meals"])
        
    else:
        print(f"{letter} did not work")


In [None]:
letter

In [None]:
len(all_recipes) # why are there two left out? should be 303

In [None]:
food_df = pd.DataFrame(all_recipes)
food_df

In [None]:
ingredients_df = food_df.filter(regex=("strIngredient.*"))
ingredients_df.fillna("", inplace = True)
ingredients_df.head()

In [None]:
food_df["all_ingredients"] = ingredients_df.apply(lambda row: ",".join(x for x in row), axis = 1)
food_df.head()

In [None]:
food_df["test_string"] = food_df["strMeal"] + " : " + food_df["all_ingredients"]

In [None]:
food_df.head()

In [None]:
filename = "themealdb_recipes.csv"
food_df.to_csv(filename, index = False)

## Build your training set

one way I think of scraping for a dataset is to scrap for all the allowed cuisine parameter.

get all the recipes from all cuisines
use the recipe ID to then scrape ingredient and diet information 
string editing to get a training string


In [None]:
url = f"https://api.spoonacular.com/recipes/complexSearch?apiKey=1bbc1401ba7d48729507a4e73d397ab5&cuisine=italian"
response = requests.get(url)
data = response.json()
data.keys()

In [None]:
data["totalResults"]

In [None]:
data["results"]

In [None]:
#?apiKey=1bbc1401ba7d48729507a4e73d397ab5&

In [None]:
cuisines = [
    "african", "asian", "american", "british", "cajun", "caribbean", 
    "chinese", "eastern%20european", "european", "french", "german", 
    "greek", "indian", "irish", "italian", "japanese", "jewish", 
    "korean", "latin%20american", "mediterranean", "mexican", 
    "middle%20eastern", "nordic", "southern", "spanish", "thai", 
    "vietnamese"
]

In [None]:
all_cuisines = []
for cuisine in cuisines:
    
    #get total number of results with only 1 actual reciepe returned
    first_url = f"https://api.spoonacular.com/recipes/complexSearch?apiKey=1bbc1401ba7d48729507a4e73d397ab5&cuisine={cuisine}&number=1"
    response = requests.get(first_url)
    data = response.json()
    total_results = data["totalResults"]
    
    #get all the results of that cuisine
    final_url = f"https://api.spoonacular.com/recipes/complexSearch?apiKey=1bbc1401ba7d48729507a4e73d397ab5&cuisine={cuisine}&number={total_results}"
    response = requests.get(final_url)
    data = response.json()
    
    all_cuisines.extend(data["results"])
        
        

In [None]:
pd.DataFrame(all_cuisines)["id"].drop_duplicates() # 948 unique reciepes

In [None]:
str(all_cuisines[0]["id"])

In [None]:
len(all_cuisines)

In [None]:
# create a list of recipe ID to pass into bulk 
all_ids = []

for recipe in all_cuisines:
    
    if str(recipe["id"]) not in all_ids:
        all_ids.append(str(recipe["id"]))

id_string = ",".join(all_ids)
id_string

In [None]:
len(all_ids)

In [None]:
id_string = ",".join(all_ids[:474])

In [None]:
# get ingredient list in id_string

url = f"https://api.spoonacular.com/recipes/informationBulk?apiKey=1bbc1401ba7d48729507a4e73d397ab5&ids={id_string}"

response = requests.get(url)
data = response.json()

In [None]:
print("Quota Left Today:", response.headers.get("X-API-Quota-Left"))

In [None]:
print("Quota Left Today:", response.headers.get("X-API-Quota-Left"))

In [None]:
response.status_code

In [None]:
data[0]["extendedIngredients"][2]["nameClean"]

### NEXT STEP: HAVE TO GET ALL 948

In [None]:
len(all_ids[474:])

In [None]:
id_string = ",".join(all_ids[474:])

In [None]:
# get ingredient list in id_string

url = f"https://api.spoonacular.com/recipes/informationBulk?apiKey=1bbc1401ba7d48729507a4e73d397ab5&ids=632003"

response = requests.get(url)
data_2 = response.json()

In [None]:
len(data_2)

In [None]:
data_2[0].keys()

In [None]:
data.extend(data_2)

In [None]:
len(data) # contains all the recipe bulk info for each id

In [None]:
# need to make new data structure of id and ingredients and then 
data[0]["vegan"]

In [None]:
data[0]["extendedIngredients"]#[2]["nameClean"]

In [None]:
# you need loop through data 
# loop through extended ingredients
recipe_ingredients = []

for recipe in data:
    
    all_ingredients = []
    one_recipe = {}
    
    for ingredient in recipe["extendedIngredients"]:
        
        if ingredient["nameClean"] is not None:
            all_ingredients.append(ingredient["nameClean"])
            
        elif ingredient["nameClean"] is None:
            all_ingredients.append(ingredient["name"])
        
    one_recipe["id"] = recipe["id"]
    one_recipe["vegetarian"] = recipe["vegetarian"]
    one_recipe["vegan"] = recipe["vegan"]
    one_recipe["ingredients"] = all_ingredients
        
    recipe_ingredients.append(one_recipe)
        
        

In [None]:
len(recipe_ingredients)

In [None]:
# need to form two dataframes and then merge them on id

all_recipes = pd.DataFrame(all_cuisines)
all_recipes.drop_duplicates(inplace = True)
all_recipes.head()

In [None]:
all_recipes.shape

In [None]:
id_ingredients = pd.DataFrame(recipe_ingredients)
id_ingredients.head()

In [None]:
id_ingredients.shape

In [None]:
merged_df = all_recipes.merge(id_ingredients, left_on="id", right_on="id")
merged_df.head()

In [None]:
# convert list of ingrd to string
merged_df["ingredients"] = merged_df["ingredients"].apply(lambda x: ",".join(x))

In [None]:
merged_df["train_string"] = merged_df["title"] + " : " + merged_df["ingredients"]
merged_df.head()

In [None]:
merged_df.head()


In [None]:
filename = "spoonacular_train_data.csv"
merged_df.to_csv(filename, index = False)

In [3]:
df = pd.read_csv("/Users/anushasubramanian/Desktop/Menu_items.csv")
df.head()

Unnamed: 0,Title,Description,Manual,Price
0,LASUNI GOBI,Florets Of Cauliflower Marinated In Lemon And ...,Vegetarian,18
1,VEGETABLE SAMOSA,Crispy Turnovers Stuffed With Seasoned Potatoe...,Vegetarian,18
2,ALOO TIKKIYA,Grilled Spicy Potato Cakes Topped With Pomegra...,Vegetarian,18
3,AMRITSARI SEEKH,"Marinated Paneer, Grilled Peppers, Tomato Sauce",Vegetarian,22
4,HARA BHARA KABAB,"Grilled Spinach Patties Stuffed With Paneer, G...",Vegetarian,18


In [4]:
df["test_str"] = df["Title"] + " : " + df["Description"]
df.head()

Unnamed: 0,Title,Description,Manual,Price,test_str
0,LASUNI GOBI,Florets Of Cauliflower Marinated In Lemon And ...,Vegetarian,18,LASUNI GOBI : Florets Of Cauliflower Marinated...
1,VEGETABLE SAMOSA,Crispy Turnovers Stuffed With Seasoned Potatoe...,Vegetarian,18,VEGETABLE SAMOSA : Crispy Turnovers Stuffed Wi...
2,ALOO TIKKIYA,Grilled Spicy Potato Cakes Topped With Pomegra...,Vegetarian,18,ALOO TIKKIYA : Grilled Spicy Potato Cakes Topp...
3,AMRITSARI SEEKH,"Marinated Paneer, Grilled Peppers, Tomato Sauce",Vegetarian,22,"AMRITSARI SEEKH : Marinated Paneer, Grilled Pe..."
4,HARA BHARA KABAB,"Grilled Spinach Patties Stuffed With Paneer, G...",Vegetarian,18,HARA BHARA KABAB : Grilled Spinach Patties Stu...


In [5]:
df.to_csv("Menu_items2.csv")