In [1]:
!pip install pdf2image
!apt-get install -y poppler-utils
from pdf2image import convert_from_path
import os

Collecting pdf2image
  Downloading pdf2image-1.17.0-py3-none-any.whl.metadata (6.2 kB)
Downloading pdf2image-1.17.0-py3-none-any.whl (11 kB)
Installing collected packages: pdf2image
Successfully installed pdf2image-1.17.0
Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
The following NEW packages will be installed:
  poppler-utils
0 upgraded, 1 newly installed, 0 to remove and 30 not upgraded.
Need to get 186 kB of archives.
After this operation, 696 kB of additional disk space will be used.
Get:1 http://archive.ubuntu.com/ubuntu jammy-updates/main amd64 poppler-utils amd64 22.02.0-2ubuntu0.6 [186 kB]
Fetched 186 kB in 0s (735 kB/s)
Selecting previously unselected package poppler-utils.
(Reading database ... 126210 files and directories currently installed.)
Preparing to unpack .../poppler-utils_22.02.0-2ubuntu0.6_amd64.deb ...
Unpacking poppler-utils (22.02.0-2ubuntu0.6) ...
Setting up poppler-utils (22.02.0-2ubuntu0.6) ...
Processing tr

In [2]:
def convert_pdf_to_images(pdf_path, output_folder):
  if not os.path.exists(output_folder):
    os.makedirs(output_folder)
    # Convert PDF to images
    images = convert_from_path(pdf_path)
    image_paths=[]
    # save images nad paths
    for i,image in enumerate(images):
      image_path=os.path.join(output_folder,f"page_{i+1}.jpg")
      image.save(image_path,"JPEG")
      image_paths.append(image_path)
    return image_paths

In [3]:
pdf_path="/content/Things mother used to make.pdf"
output_folder="images/"
image_paths=convert_pdf_to_images(pdf_path,output_folder)


In [4]:
import google.generativeai as genai  # Main Gemini API library
import os

In [5]:
from google.colab import userdata

GEMINI_API_KEY = userdata.get('Gemini_API')
genai.configure(api_key=GEMINI_API_KEY)


In [6]:
import base64
model = "gemini-1.5-flash"


In [7]:
from IPython.display import Markdown,display

In [9]:
# /content/images/page_12.jpg
image_path="/content/images/page_23.jpg"
with open(image_path,"rb") as image_file:
  image_data=encoded_string=base64.b64encode(image_file.read()).decode("utf-8")

In [11]:
# Define improved system prompt
system_prompt2 = """
Please analyze the content of this image and extract any related recipe information into structure components.
Specifically, extra the recipe title, list of ingredients, step by step instructions, cuisine type, dish type, any relevant tags or metadata.
The output must be formatted in a way suited for embedding in a Retrieval Augmented Generation (RAG) system.
"""

In [12]:
def get_gpt_response2():
  response = model.generate_content(
    [
        system_prompt2,
        "This is the image from the recipe page.",
        {"mime_type": "image/jpeg", "data": image_data}
    ]
)
  p=response.text
  return display(Markdown(p))

In [13]:
get_gpt_response2()

```json
{
  "recipe_title": "Bannocks",
  "cuisine_type": "Unknown",
  "dish_type": "Bread",
  "ingredients": [
    "1 Cupful of Thick Sour Milk",
    "1½ Cupful of Sugar",
    "1 Egg",
    "2 Cupfuls of Flour",
    "1½ Cupful of Indian Meal",
    "1 Teaspoonful of Soda",
    "A pinch of Salt"
  ],
  "instructions": [
    "Make the mixture stiff enough to drop from a spoon.",
    "Drop mixture, size of a walnut, into boiling fat.",
    "Serve warm, with maple syrup."
  ],
  "tags": ["bread", "bannock"]
}
```

```json
{
  "recipe_title": "Boston Brown Bread",
  "cuisine_type": "Unknown",
  "dish_type": "Bread",
  "ingredients": [
    "1 Cupful of Rye Meal",
    "1 Cupful of Graham Meal",
    "1½ Cupful of Flour",
    "1 Cupful of Indian Meal",
    "1 Cupful of Sweet Milk",
    "1 Cupful of Sour Milk",
    "1 Cupful of Molasses",
    "½ Teaspoonful of Salt",
    "1 Heaping Teaspoonful of Soda"
  ],
  "instructions": [
    "Stir the meals and salt together.",
    "Beat the soda into the molasses until it foams; add sour milk, mix all together",
    "Pour into a tin pail which has been well greased, if you have no brown-bread steamer."
  ],
  "tags": ["bread", "boston brown bread"]
}
```

In [14]:
import time

In [None]:
extracted_recipes = []
skipped_recipes = []  # Track skipped recipes

for image_path in image_paths:
  with open(image_path, "rb") as image_file:
    image_data = encoded_string = base64.b64encode(image_file.read()).decode("utf-8")

  try:
    response = model.generate_content(
      [
          system_prompt2,
          "This is the image from the recipe page.",
          {"mime_type": "image/jpeg", "data": image_data}
      ]
    )

    try:
      gpt_response = response.text
      extracted_recipes.append({"image_path": image_path, "recipe info": gpt_response})
      print(f"extracted info {image_path}:\n {gpt_response}\n ")
    except ValueError as ve:
      # Check if it's a copyright error
      if "copyrighted material" in str(ve):
        print(f"Skipping {image_path}: Content appears to be copyrighted")
        skipped_recipes.append(image_path)
      else:
        # Re-raise other errors
        raise ve

  except Exception as e:
    print(f"Error processing {image_path}: {str(e)}")
    skipped_recipes.append(image_path)

  time.sleep(4)  # Keep your original delay

# Print summary of skipped recipes at the end
if skipped_recipes:
  print("\nSkipped recipes due to copyright or errors:")
  for path in skipped_recipes:
    print(f"- {path}")

In [None]:
extracted_recipes


In [None]:
filtered_recipes = []

for recipe in extracted_recipes:
    if any(keyword in recipe["recipe info"].lower() for keyword in ["ingredients", "instructions", "recipe title"]):
        filtered_recipes.append(recipe)
    else:
        print(f"Skipping recipe: {recipe['image_path']}")


In [None]:
import json

In [None]:
output_file="recipe_info.json"
with open(output_file,"w") as json_file:
  json.dump(filtered_recipes,json_file,indent=4)

# EMBEDDINGS

In [None]:
import numpy as np

In [None]:
with open("recipe_info.json","r") as json_file:
  recipes=json.load(json_file)

In [None]:
recipe_texts=[recipe["recipe info"] for recipe in recipes]
# Option 1: Using sentence-transformers (completely free, runs locally)
from sentence_transformers import SentenceTransformer

def generate_embeddings_sentence_transformers(recipe_texts):
    # Load a pre-trained model (this downloads it once)
    model = SentenceTransformer('all-mpnet-base-v2')  # Smaller, faster model
    # For higher quality: 'all-mpnet-base-v2'
    # all-MiniLM-L6-v2
    # Generate embeddings
    embeddings = model.encode(recipe_texts)

    return embeddings

# Use like this:
embeddings = generate_embeddings_sentence_transformers(recipe_texts)

In [None]:
# convert emdding to numpy array
embedding_matrix=np.array(embeddings)
embedding_matrix

In [None]:
!pip install faiss-cpu

In [None]:
import faiss
index=faiss.IndexFlatL2(embedding_matrix.shape[1])
index.add(embedding_matrix)
# save the index
faiss.write_index(index, 'filtered_recipe_index.index')
# save metadata
metadata=[{'recipe_info':recipe.get('recipe info', 'N/A'),  # Use 'recipe info' key
           "image_path":recipe['image_path']} for recipe in filtered_recipes]

In [None]:
def query_embeddings(query, index, metadata, k=5, model=None):
    # Check if model is provided, otherwise load it
    if model is None:
        model = SentenceTransformer('all-mpnet-base-v2')

    # Generate the embeddings for the query using sentence-transformers
    query_embedding = model.encode([query])[0]
    print(f"The query embedding is {query_embedding}\n")

    query_vector = np.array(query_embedding).reshape(1, -1)
    print(f"The query vector is {query_vector}\n")

    # Search faiss index
    distances, indices = index.search(query_vector, min(k, len(metadata)))

    # Store the indices and distances
    stored_indices = indices[0].tolist()
    stored_distances = distances[0].tolist()
    print(f"The stored indices are {stored_indices}\n")
    print(f"The stored distances are {stored_distances}\n")

    # Return the results
    results = [
        (metadata[i]['recipe_info'], dist)
        for i, dist in zip(stored_indices, stored_distances)
        if 0 <= i < len(metadata)
    ]
    return results

In [None]:
query="How to make bread?"
results=query_embeddings(query,index,metadata)
results

In [None]:
def combine_retrieved(results):
  combined_content="\n\n".join([result[0]for result in results])
  return combined_content

In [None]:
combined_content=combine_retrieved(results)

In [None]:
combined_content

# retrieving with context

In [None]:
# Define the system prompt
system_prompt4 = f"""
You are highly experienced and expert chef specialized in providing cooking advice.
Your main task is to provide information precise and accurate on the combined content.
You answer diretly to the query using only information from the provided {combined_content}.
If you don't know the answer, just say that you don't know.
Your goal is to help the user and answer the {query}
"""

In [None]:
def generate_response(query, combined_content, system_prompt):
    # Create a model instance with your system instruction
    model = genai.GenerativeModel(
        model_name='models/gemini-1.5-flash',
        system_instruction=system_prompt4  # Remove the '4' suffix
    )

    # Generate response using Gemini - format as per Gemini API requirements
    response = model.generate_content(
        contents=[
            {
                "role": "user",
                "parts": [{"text": f"Query: {query}\n\nInformation: {combined_content}"}]
            }
        ],
        generation_config={"temperature": 0}
    )

    return response

In [None]:
# Get the results from the API
query = "How to make bread?"
combined_content = combine_retrieved(results)
response = generate_response(query, combined_content, system_prompt3)
display(Markdown(response.text))

In [None]:
# Get the results from the API
query = "give me  chocolate cake recipe?"
combined_content = combine_retrieved(results)
response = generate_response(query, combined_content, system_prompt3)
display(Markdown(response.text))