In [17]:
from openai import OpenAI

from langchain_openai import OpenAIEmbeddings, OpenAI
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import TextLoader
from langchain.chains import RetrievalQAWithSourcesChain
from langchain_chroma import Chroma

import os
from dotenv import load_dotenv
import gradio as gr

In [18]:
load_dotenv()
openai_api_key = os.getenv("OPENAI_API_KEY")

In [19]:
embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key)

In [20]:
if openai_api_key:
    print(openai_api_key[:10])

sk-proj-ed


In [21]:
DATA_FILE_PATH = "../../datasets/eleven_madison_park_data.txt"

In [22]:
loader = TextLoader(DATA_FILE_PATH, encoding="utf-8")

In [23]:
raw_documents = loader.load()

In [24]:
print(f"Document Loaded: {len(raw_documents)}")

Document Loaded: 1


In [25]:
print(raw_documents[0].page_content[:500] + "...")

Source: https://www.elevenmadisonpark.com/
Title: Eleven Madison Park
Content:
Book on Resy
---END OF SOURCE---

Source: https://www.elevenmadisonpark.com/careers
Title: Careers — Eleven Madison Park
Content:
Join Our Team Eleven Madison Park ▾ All Businesses Eleven Madison Park Clemente Bar Daniel Humm Hospitality Filter Categories Culinary Pastry Wine & Beverage Dining Room Office & Admin Other Job Types Full Time Part Time Compensation Salary Hourly Apply filters OPEN OPPORTUNITIES Staff Acco...


In [26]:
print(raw_documents[0].page_content[-500:] + "...")

ase contact careers@elevenmadisonpark.com or visit Culinary Agents . To stay up to date about future Eleven Madison Park news and events, please sign up for our Newsletter . Please contact events@elevenmadisonpark.com Please contact press@elevenmadisonpark.com Thank you for thinking of Eleven Madison Park for your inquiry. At this time, all of our non-profit efforts are focused on our partnership with Rethink Food . Email: info@elevenmadisonpark.com Phone: 212.889.0905 Ext. 3
---END OF SOURCE---...


In [27]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap = 150,)

In [28]:
try:
    documents = text_splitter.split_documents(raw_documents)
    print(f"Document split into: {len(documents)} chunks")
except Exception as e:
    raise ValueError("Error splitting document: {e}")

Document split into: 38 chunks


In [29]:
print(documents[8].page_content)

Source: https://www.elevenmadisonpark.com/faq
Title: FAQs — Eleven Madison Park
Content:


In [30]:
print(documents[0].metadata)

{'source': '../../datasets/eleven_madison_park_data.txt'}


In [31]:
documents

[Document(metadata={'source': '../../datasets/eleven_madison_park_data.txt'}, page_content='Source: https://www.elevenmadisonpark.com/\nTitle: Eleven Madison Park\nContent:\nBook on Resy\n---END OF SOURCE---'),
 Document(metadata={'source': '../../datasets/eleven_madison_park_data.txt'}, page_content='Source: https://www.elevenmadisonpark.com/careers\nTitle: Careers — Eleven Madison Park\nContent:'),
 Document(metadata={'source': '../../datasets/eleven_madison_park_data.txt'}, page_content="Join Our Team Eleven Madison Park ▾ All Businesses Eleven Madison Park Clemente Bar Daniel Humm Hospitality Filter Categories Culinary Pastry Wine & Beverage Dining Room Office & Admin Other Job Types Full Time Part Time Compensation Salary Hourly Apply filters OPEN OPPORTUNITIES Staff Accountant - Part Time Eleven Madison Park Part Time • Hourly ($20 - $25) Host/Reservationist Eleven Madison Park Full Time • Hourly ($24) Sous Chef Eleven Madison Park Full Time • Salary ($72K - $75K) Pastry Cook Ele

In [32]:
# Created vector store and input documents
vector_store = Chroma.from_documents(documents=documents, embedding=embeddings)

In [33]:
vector_count = vector_store._collection.count()
print(f"Chroma vectorstore created with {vector_count} items.")

Chroma vectorstore created with 38 items.


In [34]:
# Retrieve the first chunk of stored data from the vector store
stored_data = vector_store.get(include=["embeddings", "documents"], limit=1)

In [35]:
print("First chunk:\n", stored_data['documents'][0])
print("\nEmbedding vector\n", stored_data['embeddings'][0])
print(f"\nFull embedding dimensions: {len(stored_data['embeddings'][0])}")

First chunk:
 Source: https://www.elevenmadisonpark.com/
Title: Eleven Madison Park
Content:
Book on Resy
---END OF SOURCE---

Embedding vector
 [ 0.02317428 -0.01570047 -0.00702894 ... -0.02469368 -0.01039626
 -0.06181634]

Full embedding dimensions: 1536


In [36]:
test_query = "Is there a dress code?"

In [37]:
try:
    similar_docs = vector_store.similarity_search(test_query, k=1)
    print(f"Found {len(similar_docs)}")

    for i, doc in enumerate(similar_docs):
        print(f"\n--- Document {i+1} ---")
        # Get the first 700 characters
        content_snippet = doc.page_content[:700].strip()
        source = doc.metadata.get("source", "Unknown Source")
        print(f"Content snippet: {content_snippet}")
        print(f"Source: {source}")
except Exception as e:
    print(f"An error occurred: {e}")

Found 1

--- Document 1 ---
Content snippet: starting at $125 per guest, as well as a full wine list that our wine team can help you select from and may also be viewed on our website. You are also welcome to bring your own special bottle of wine for your meal for a $75 per 750ml bottle corkage (4 bottle maximum) fee. We do not have a dress code. Many of our guests dress up for the occasion but wear whatever will make you most comfortable. You can reach our reservations team at any time at info@elevenmadisonpark.com . We can accommodate up to seven guests at a table in the main dining room. If you are interested in booking a table for a group larger than this, please contact our Private Dining and Special Events team at events@elevenmad
Source: ../../datasets/eleven_madison_park_data.txt


In [38]:
retriever = vector_store.as_retriever(search_kwargs={"k": 3})

llm = OpenAI(temperature=0, openai_api_key=openai_api_key)

In [39]:
qa_chain = RetrievalQAWithSourcesChain.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=retriever,
    return_source_documents=False,
    verbose=True
)

In [40]:
# Test the full chain
chain_test_query = "What kind of food does Eleven Madison Park serve?"
print(f"Query: {chain_test_query}")

try:
    result = qa_chain.invoke({"question": chain_test_query})

    # print the answer and the sources from the result directory
    print("\n--- Answer ---\n")
    print(result.get("answer", "No answer generated."))

    print("\n--- Sources ---\n")
    print(result.get("sources", "No sources identified"))

    if "source_documents" in result:
        for i, doc in enumerate(result["source_documents"]):
            content_snippet = doc.page_content[:250].strip()
            print(f"Doc {i+1}: {content_snippet}")

except Exception as e:
    print(f"\nAn error occurred while running the chain: {e}")

Query: What kind of food does Eleven Madison Park serve?


[1m> Entering new RetrievalQAWithSourcesChain chain...[0m

[1m> Finished chain.[0m

--- Answer ---

 Eleven Madison Park serves a fully plant-based menu, using no animal products.


--- Sources ---

https://www.elevenmadisonpark.com/ourrestaurant, https://www.elevenmadisonpark.com/faq


In [41]:
result

{'question': 'What kind of food does Eleven Madison Park serve?',
 'answer': ' Eleven Madison Park serves a fully plant-based menu, using no animal products.\n',
 'sources': 'https://www.elevenmadisonpark.com/ourrestaurant, https://www.elevenmadisonpark.com/faq'}

In [None]:
# This function takes the user input, runs the chain, and then formats the output in a gradio interface
# The 'qa_chain' variables needs to be accessible in this scope for this function to work.

def ask_elevenmadison_assistant(user_query):
    """
    Process the user query using the RAG chain and return formatted results
    """
    print(f"\nProcessing Gradio query: '{user_query}")
    if not user_query or user_query.strip() == "":
        print("--> Empty query recieved.")
        return "Please enter a question.", "" # Handle empty input gracefully
    
    try:
        result = qa_chain.invoke({"question": user_query})

        # Extract the answer and sources
        answer = result.get("answer", "Sorry, I couldn't find an answer in the provided documents.")
        sources = result.get("sources", "No specific sources identified.")

        if sources == DATA_FILE_PATH:
            sources = f"Retrieved from: {DATA_FILE_PATH}"
        elif isinstance(sources, list): # Handle potential list of sources
            sources = ", ".join(list(set(sources))) # Unique, comma-separated

        print(f"--> Answer generated: {answer[:100].strip()}...")
        print(f"--> Sources identified: {sources}")

        # Return the answer and sources to be displayed with Gradio output compontents
        return answer.strip(), sources

    except Exception as e:
        error_message = f"An error occurred: {e}"
        print("--> Error during chain execution: {error_message}")
        return error_message, "Error occurred"
    
# Create the gradio interface
with gr.Blocks(theme=gr.themes.Soft(), title="Eleven Madison Park Q&A Assistant") as demo:
    gr.Markdown(
    """
    # Eleven Madison Park - AI Q&A Assistant
    Ask questions about the restaurant based on its website data.
    The AI provides answers and cites the source document.
    *(Examples: What are the menu prices?  Who is the chef? Is it plant-based?)*
    """
    )

    # Input  component for the user's question
    question_input = gr.Textbox(
        label = "Your Question:",
        placeholder = "e.g., What are the operating hours on Saturday?",
        lines = 2, # Allow space for longer questions
    )

    # Row layout for the output
    with gr.Row():
        # Output component for the generated answer (read only)
        answer_output = gr.Textbox(label="Answer:", interactive=False, lines=6) # User cannot edit this
        # Output component for the souce (read only)
        sources_output = gr.Textbox(label="Sources:", interactive=False, lines=2)

    # Row for buttons
    with gr.Row():
        # Button for submitting question
        submit_button = gr.Button("Ask RAG Q&A chat app", variant="primary")
        # Clear button to reset input and output values
        clear_button = gr.ClearButton(components=[question_input, answer_output, sources_output], value="Clear All")

    # Add some example questions for users to try
    gr.Examples(
        examples=[
            "What are the different menu options and prices?",
            "Who is the head chef?",
            "What is Magic Farms?"
        ],
        inputs = question_input, # Clicking example will load this input
        cache_examples = False, # Don't pre-compute results for examples for simplicity
    )

    # Connect the submit button to the Function
    submit_button.click(fn=ask_elevenmadison_assistant, inputs=question_input, outputs=[answer_output, sources_output])

print("Gradio interface defined")

# Launch the Gradio app
demo.launch()


Gradio interface defined
* Running on local URL:  http://127.0.0.1:7860
* To create a public link, set `share=True` in `launch()`.





Processing Gradio query: 'What hours is the restaurance open on saturdays?


[1m> Entering new RetrievalQAWithSourcesChain chain...[0m

[1m> Finished chain.[0m
--> Answer generated: The restaurant is open on Saturdays from 12 pm to 2 pm and 5 pm to 11 pm....
--> Sources identified: Retrieved from: ../../datasets/eleven_madison_park_data.txt
