In [1]:
import os
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from dotenv import load_dotenv
from langchain.vectorstores import Chroma
import openai
import chromadb

In [2]:
# File path for the document
document_dir = "./data/"
filename = "general_cooking.pdf"
file_path = os.path.join(document_dir, filename)

In [3]:
# Load and split the document
loader = PyPDFLoader(file_path)
pages = loader.load_and_split()
len(pages)

Ignoring wrong pointing object 57 0 (offset 0)
Ignoring wrong pointing object 1237 0 (offset 0)
Ignoring wrong pointing object 2199 0 (offset 0)
Ignoring wrong pointing object 8529 0 (offset 0)
Ignoring wrong pointing object 12853 0 (offset 0)
Ignoring wrong pointing object 30002 0 (offset 0)
Ignoring wrong pointing object 31357 0 (offset 0)
Ignoring wrong pointing object 31927 0 (offset 0)
Ignoring wrong pointing object 31928 0 (offset 0)


908

In [4]:
# Split pages into chunks
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
chunks = text_splitter.split_documents(pages)
len(chunks)

908

In [5]:
load_dotenv()
api_key = os.getenv("OPENAI_API_KEY")
embeddings = OpenAIEmbeddings(model="text-embedding-3-large")

  embeddings = OpenAIEmbeddings(model="text-embedding-3-large")


In [10]:

client = chromadb.Client()

db1 = Chroma.from_documents(
    chunks[:500], 
    embeddings, 
    persist_directory="./chroma_db"
)

print("ChromaDB created with document embeddings.")

client = chromadb.Client()

db2 = Chroma.from_documents(
    chunks[500:], 
    embeddings, 
    persist_directory="./chroma_db"
)

print("ChromaDB created with document embeddings.")


ChromaDB created with document embeddings.
ChromaDB created with document embeddings.


In [38]:
user_question = "what do i need to know when making noodles?" # User question
retrieved_docs = db1.similarity_search(user_question, k=10) # k is the number of documents to retrieve

In [39]:
def _get_document_prompt(docs):
    prompt = "\n"
    for doc in docs:
        prompt += "\nContent:\n"
        prompt += doc.page_content + "\n\n"
    return prompt

In [40]:
# Generate a formatted context from the retrieved documents
formatted_context = _get_document_prompt(retrieved_docs)
print("Context formatted for GPT model.")

Context formatted for GPT model.


In [41]:
prompt = f"""
## SYSTEM ROLE
You are a kitchen aid and give assistance in finding recipes and answering questions about cooking, food, drinks, and nutrition.
Format lists properly with bullet points or numbering.


## USER QUESTION
The user has asked: 
"{user_question}"

## CONTEXT
Here is the relevant content from the technical documents:  
'''
{formatted_context}
'''

## GUIDELINES
1. **Accuracy**:  
   - Only use the content in the `CONTEXT` section to answer.  
   - If the answer cannot be found, explicitly state: "The provided context does not contain this information."

3. **Clarity**:  
   - Use simple, professional, and concise language.  
   - Format your response in Markdown for readability.  

## TASK
1. Answer the user's question **directly** if possible.  
2. Point the user to relevant parts of the documentation.  
3. Provide the response in the following format:

## RESPONSE FORMAT
'''
# [Brief Title of the Answer]
[Answer in simple, clear text.]
'''
"""
print("Prompt constructed.")

Prompt constructed.


In [42]:
# Set up GPT client and parameters
client = openai.OpenAI()
model_params = {
    'model': 'gpt-4o',
    'temperature': 0.7,  # Increase creativity
    'max_tokens': 4000,  # Allow for longer responses
    'top_p': 0.9,        # Use nucleus sampling
    'frequency_penalty': 0.5,  # Reduce repetition
    'presence_penalty': 0.6    # Encourage new topics
}

In [43]:
messages = [{'role': 'user', 'content': prompt}]
completion = client.chat.completions.create(messages=messages, **model_params, timeout=120)

In [44]:
answer = completion.choices[0].message.content
print(answer)

'''
# Key Considerations for Making Noodles

When making noodles, there are several important aspects to consider based on the provided context:

1. **Types of Flour**:
   - Use durum wheat flour for dried pasta due to its high gluten content which makes it easier to roll out.
   - For fresh egg pasta, standard bread wheat and eggs are typically used.

2. **Dough Preparation**:
   - Mix ingredients into a stiff dough and knead until smooth.
   - Allow the dough to rest so that the flour particles absorb water and the gluten network develops.
   - Roll the dough gently and repeatedly to form thin sheets, which helps organize and compress the gluten network.

3. **Eggs in Pasta**:
   - Eggs enhance color and richness; yolks provide fat for tenderness while egg whites add protein for firmness.
   - Fresh pasta made with eggs should be cooked immediately or refrigerated due to potential salmonella risk.

4. **Cooking Techniques**:
   - Cook pasta al dente by stopping when the center remain