# AutoChef

## Prepare environment and process data

Data Source: https://www.kaggle.com/datasets/irkaal/foodcom-recipes-and-reviews?resource=download

Data Source v2: https://app.roboflow.com/bens-workspace-3xdyh/fridge-detection-aymme/browse?queryText=&pageSize=50&startingIndex=0&browseQuery=true

In [12]:
from transformers import AutoProcessor, AutoModelForZeroShotImageClassification
import kagglehub
import pandas as pd
import numpy
import re
import PIL.Image
from ultralytics import YOLO
import torch
import cv2
import requests

In [None]:
!pip install torch torchvision torchaudio

In [None]:


processor = AutoProcessor.from_pretrained("openai/clip-vit-base-patch32")
model = AutoModelForZeroShotImageClassification.from_pretrained("openai/clip-vit-base-patch32")

In [None]:


path = kagglehub.dataset_download("irkaal/foodcom-recipes-and-reviews")

print("Path to dataset files:", path)

In [None]:
recipes = pd.read_csv(path + "/recipes.csv")

In [None]:
print(recipes.shape)
display(recipes.head())
print(recipes.columns)

In [None]:

#Convert R-style vector strings to Python lists for 'RecipeIngredientParts' column
def r_vector_to_list(s):
    # Remove c( and )
    s = s.strip()
    s = re.sub(r'^c\(|\)$', '', s)
    # Split by comma, strip quotes and whitespace
    return [item.strip().strip('"').strip("'") for item in s.split(',')]

recipes['RecipeIngredientParts'] = recipes['RecipeIngredientParts'].apply(r_vector_to_list)

## Core functionality

In [None]:
all_ingredients = recipes['RecipeIngredientParts'].explode().unique().tolist()
print(len(all_ingredients))

Use CLIP to match images to ingredients

In [None]:
test_image = PIL.Image.open("fridge_test.jpg")

batch_size = 100
ingredient_scores = []

for i in range(0, len(all_ingredients), batch_size):
    batch_ingredients = all_ingredients[i:i + batch_size]
    inputs = processor(text=batch_ingredients, images=test_image, return_tensors="pt", padding=True)
    outputs = model(**inputs)
    scores = outputs.logits_per_image[0].detach().cpu().numpy()
    ingredient_scores.extend(zip(batch_ingredients, scores))

In [None]:
# Get the ingredients present in the image, sorted by score
ingredient_scores.sort(key=lambda x: x[1], reverse=True)
top_ingredients = [(ingredient, score) for ingredient, score in ingredient_scores if score > 20]
print("Top ingredients in the image:")
for ingredient, score in top_ingredients:
    print(f"{ingredient}: {score:.4f}")



Use the dataset to find recipes that match a set of ingredients

## Take 2 - use YOLO for a simplified approach

In [11]:

print("CUDA Available:", torch.cuda.is_available())
print("Device Name:", torch.cuda.get_device_name(0) if torch.cuda.is_available() else "No GPU")

print(torch.__version__)
print(torch.version.cuda)


CUDA Available: True
Device Name: NVIDIA GeForce GTX 1660
2.3.1+cu118
11.8
Torch: 2.3.1+cu118
Torchvision: 0.18.1+cu118
CUDA available: True
tensor([0], device='cuda:0')


In [None]:
!pip install roboflow

from roboflow import Roboflow
rf = Roboflow(api_key="8zoDBiAT84ugEQEfOSDf")
project = rf.workspace("bens-workspace-3xdyh").project("food-item-detection-fggyf-j86bp")
version = project.version(1)
dataset = version.download("yolov8")


### Core functionality

In [2]:
# Load your trained model
model = YOLO('best_v2.pt')

# Load an image
img = cv2.imread('fridge_test_2.jpg')

# Run inference
results = model(img)

# Print detected objects
for result in results:
    boxes = result.boxes
    for box in boxes:
        class_idx = int(box.cls)
        class_name = model.names[class_idx]
        confidence = float(box.conf)
        print(f"Class: {class_name}, Confidence: {confidence:.2f}, Box: {box.xyxy}")

confidence_threshold = 0.7

for result in results:
    # Filter boxes by confidence
    high_conf_boxes = [box for box in result.boxes if float(box.conf) > confidence_threshold]
    if high_conf_boxes:
        # Optionally, update result.boxes to only include high confidence boxes
        result.boxes = high_conf_boxes
        result.show()


0: 640x640 1 apple, 1 butter, 1 carrot, 1 chicken, 1 corn, 1 green_beans, 1 ground_beef, 1 ham, 1 spinach, 2 sugars, 1 tomato, 15.5ms
Speed: 5.7ms preprocess, 15.5ms inference, 170.3ms postprocess per image at shape (1, 3, 640, 640)
Class: tomato, Confidence: 0.94, Box: tensor([[317.9568, 268.1198, 425.1532, 314.7433]], device='cuda:0')
Class: apple, Confidence: 0.94, Box: tensor([[173.8641, 120.6684, 282.8015, 157.4133]], device='cuda:0')
Class: chicken, Confidence: 0.93, Box: tensor([[168.5426, 258.2592, 292.6954, 313.8521]], device='cuda:0')
Class: spinach, Confidence: 0.92, Box: tensor([[167.8266, 362.3416, 339.5656, 416.4059]], device='cuda:0')
Class: sugar, Confidence: 0.88, Box: tensor([[420.7213, 491.2953, 489.9366, 570.9257]], device='cuda:0')
Class: ground_beef, Confidence: 0.86, Box: tensor([[252.9537, 538.1412, 357.7353, 592.6963]], device='cuda:0')
Class: corn, Confidence: 0.85, Box: tensor([[472.1060, 121.4534, 525.9878, 157.7587]], device='cuda:0')
Class: green_beans, C

In [11]:
# Get a list of detected classes
detected_classes = [model.names[int(box.cls)] for box in results[0].boxes if float(box.conf) > confidence_threshold]
print("Detected classes:", detected_classes)

Detected classes: ['tomato', 'apple', 'chicken', 'spinach', 'sugar', 'ground_beef', 'corn', 'green_beans', 'butter', 'sugar', 'ham']


In [17]:
# Get recipes that can be made with the detected ingredients
url = "https://api.foodoscope.com/recipe2-api/recipebyingredient/by-ingredients-categories-title"

params = {
    "includeIngredients": ",".join(detected_classes),
    "excludeIngredients": "",
    "limit": 5
}

print("Requesting URL with params:", params)
response = requests.get(url, params=params)

headers = {
    "Content-Type": "application/json",
    "Authorization": "Bearer V_3d3U-mDsD1LqRgGpjkOpcd4mqnI2eIefAiZl0Hz1eed-PN",
}

response = requests.get(
    url,
    params=params,
    headers=headers
)

print(response.json())
data = response.json()

Requesting URL with params: {'includeIngredients': 'tomato,apple,chicken,spinach,sugar,ground_beef,corn,green_beans,butter,sugar,ham', 'limit': 5}
{'success': True, 'message': 'Recipes fetched successfully', 'payload': {'data': [{'_id': '6405722ea13d0d2d35892a41', 'Recipe_id': '10000', 'Calories': '101.0', 'cook_time': '180', 'prep_time': '5', 'servings': '12', 'Recipe_title': 'Sweet Honey French Bread', 'total_time': '185', 'Region': 'French', 'Sub_region': 'French', 'Continent': 'European', 'vegan': '0.0', 'pescetarian': '0.0', 'ovo_vegetarian': '0.0', 'lacto_vegetarian': '0.0', 'ovo_lacto_vegetarian': '0.0'}, {'_id': '640572c5a13d0d2d358a1aa2', 'Recipe_id': '100000', 'Calories': '272.8', 'cook_time': '0', 'prep_time': '0', 'servings': '06-Aug', 'Recipe_title': 'Southwestern Beef Brisket', 'total_time': '205', 'Region': 'South American', 'Sub_region': 'Argentine', 'Continent': 'Latin American', 'vegan': '0.0', 'pescetarian': '0.0', 'ovo_vegetarian': '0.0', 'lacto_vegetarian': '0.0', 

In [22]:
for recipe in data.get('payload', []).get('data', []):
    print(f"Title: {recipe['Recipe_title']}")
    print(f"_id: {recipe['Recipe_id']}")
    print()

Title: Sweet Honey French Bread
_id: 10000

Title: Southwestern Beef Brisket
_id: 100000

Title: Heirloom Tomato Salad With Goat Cheese and Arugula
_id: 100002

Title: Heirloom Tomato Sandwich With Basil Mayo
_id: 100003

Title: Heirloom Apple Pie
_id: 100004



In [23]:
url = "https://api.foodoscope.com/recipe2-api/search-recipe/" + '10000'

response = requests.get(url)

headers = {
    "Content-Type": "application/json",
    "Authorization": "Bearer V_3d3U-mDsD1LqRgGpjkOpcd4mqnI2eIefAiZl0Hz1eed-PN",
}

response = requests.get(
    url,
    params=params,
    headers=headers
)

print(response.json())
recipe_data = response.json()

{'recipe': {'_id': '6405722ea13d0d2d35892a41', 'Recipe_id': '10000', 'Calories': '101.0', 'cook_time': '180', 'prep_time': '5', 'servings': '12', 'Recipe_title': 'Sweet Honey French Bread', 'total_time': '185', 'Region': 'French', 'Sub_region': 'French', 'Continent': 'European', 'Source': 'AllRecipes', 'Carbohydrate, by difference (g)': '201.5316', 'Energy (kcal)': '1079.536', 'Protein (g)': '32.8252', 'Total lipid (fat) (g)': '13.5484', 'Processes': 'add||drizzle', 'vegan': '0.0', 'pescetarian': '0.0', 'ovo_vegetarian': '0.0', 'lacto_vegetarian': '0.0', 'ovo_lacto_vegetarian': '0.0'}, 'ingredients': [{'_id': '640f11faa80bf2bc741e57e0', 'recipe_no': '10000', 'ingredient_Phrase': '3/4 cup water', 'ingredient': 'water', 'quantity': '3/4', 'unit': 'cup', 'ing_id': '3', 'ndb_id': '14555', 'M_or_A': 'M'}, {'_id': '640f11faa80bf2bc741e57e1', 'recipe_no': '10000', 'ingredient_Phrase': '2 teaspoons honey', 'ingredient': 'honey', 'quantity': '2', 'unit': 'teaspoons', 'ing_id': '55', 'ndb_id': '

In [31]:
ingredients = recipe_data.get('ingredients', [])
for ingredient in ingredients:
    print(f"Ingredient: {ingredient['ingredient']}")

Ingredient: water
Ingredient: honey
Ingredient: olive oil
Ingredient: salt
Ingredient: white sugar
Ingredient: bread flour
Ingredient: active yeast
