In [1]:
import os
import tqdm
import numpy as np
import pandas as pd

# API Setup

In [2]:
from dotenv import load_dotenv
load_dotenv(dotenv_path="../.env")

True

# Load Data

In [3]:
# Load embedding model
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
embedding_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5", embed_batch_size=32)



In [4]:
# Load the index from disk
from llama_index.vector_stores.lancedb import LanceDBVectorStore
from llama_index.core import VectorStoreIndex

vector_store = LanceDBVectorStore(
    uri="./lancedb", table_name="pipeline_test"
)
index = VectorStoreIndex.from_vector_store(
    vector_store,
    embed_model=embedding_model,
)

In [5]:
def retrieve(index, user_input):
    results = index.as_retriever(similarity_top_k=3).retrieve(user_input)
    return "\n----------------\n".join([f"Title: {result.metadata['title']}\n{result.text}" for result in results])

# Set up LLM

In [6]:
# OpenAI
from llama_index.llms.openai import OpenAI

# Set up OpenAI client - API key is handled in your .env file
openai_llm = OpenAI(model="gpt-4o", temperature=0.1, max_tokens=2048)

In [7]:
# AnyScale
from llama_index.llms.anyscale import Anyscale

# Set up AnyScale client - API key is handled in your .env file
anyscale_llm = Anyscale(model="meta-llama/Meta-Llama-3-70B-Instruct", temperature=0.1, max_tokens=2048)

# HyDE

We examine two scenarios - one where we don't use HyDE and one where we do. 

Note: To focus on HyDE, we've simplified and abstracted away most of the helper code we used previously.

## Without Hyde



In [8]:
user_input = "How many points did Michael Jordan actually score in his final NBA game?"

In [9]:
from llama_index.core.prompts import PromptTemplate

prompt = """You are a helpful AI assistant that answers questions after carefully reading all the provided context. You always cite sources (document titles) and also quote the relevant snippets. Information may be spread across multiple documents. If the information is not present in any of the contexts, you will say 'I don't know'.
-----
Context: 
{context_str} 
-----
Question: {query_str} 
Answer: `answer`
Source: `source document title`
Relevant Snippet: `snippet`
"""

# Convert prompt into a prompt template that llamaindex can use
prompt = PromptTemplate(template=prompt)

In [10]:
# OpenAI
openai_query_engine = index.as_query_engine(llm=openai_llm, text_qa_template=prompt, similarity_top_k=3)

# Anyscale
anyscale_query_engine = index.as_query_engine(llm=anyscale_llm, text_qa_template=prompt, similarity_top_k=3)

In [11]:
print(openai_query_engine.query(user_input))

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Michael Jordan scored 13 points in his final NBA game.

Source: Michael Jordan
Relevant Snippet: "Jordan's final NBA game was on April 16, 2003 in Philadelphia. After scoring only 13 points in the game, Jordan went to the bench with 4 minutes and 13 seconds remaining in the third quarter and with his team trailing the Philadelphia 76ers, 75–56."


In [12]:
print(anyscale_query_engine.query(user_input))

Answer: 13
Source: Michael Jordan
Relevant Snippet: "After scoring only 13 points in the game, Jordan went to the bench with 4 minutes and 13 seconds remaining in the third quarter and with his team trailing the Philadelphia 76ers, 75–56."


### With HyDE

In [13]:
# Query transforms perform query transformations such as expansion, HyDE, etc.
from llama_index.core.indices.query.query_transform import HyDEQueryTransform
openai_hyde = HyDEQueryTransform(llm=openai_llm, include_original=True)
anyscale_hyde = HyDEQueryTransform(llm=anyscale_llm, include_original=True)

In [14]:
# Query engine applies a query transformation before retrieval/answer generation
from llama_index.core.query_engine import TransformQueryEngine
anyscale_hyde_query_engine = TransformQueryEngine(anyscale_query_engine, anyscale_hyde)
openai_hyde_query_engine = TransformQueryEngine(openai_query_engine, anyscale_hyde)

In [15]:
# Show the HyDE prompt
print(anyscale_hyde.get_prompts()["hyde_prompt"].template)

Please write a passage to answer the question
Try to include as many key details as possible.


{context_str}


Passage:"""



In [16]:
# Anyscale
print("Hypothetical document:")
print(anyscale_hyde.run(user_input).custom_embedding_strs[0])
print("---" * 30)
print(anyscale_hyde_query_engine.query(user_input))

Hypothetical document:
Here is a passage answering the question:

Michael Jordan's final NBA game took place on April 18, 2003, when his Washington Wizards faced off against the Philadelphia 76ers in the first round of the playoffs. In a game that would ultimately be a 107-87 loss for the Wizards, Jordan, then 40 years old, played 37 minutes and scored 15 points. This marked the last time Jordan would step onto an NBA court as a player, bringing an end to an illustrious career that spanned nearly two decades and included six NBA championships, five MVP awards, and countless other accolades. Despite the disappointing outcome of the game, Jordan's 15-point performance was a testament to his enduring talent and competitive spirit, even in the twilight of his playing days.
------------------------------------------------------------------------------------------
Answer: 13 points
Source: Michael Jordan
Relevant Snippet: "After scoring only 13 points in the game, Jordan went to the bench wi

In [17]:
# OpenAI
print("Hypothetical document:")
print("OpenAI:", openai_hyde.run(user_input).custom_embedding_strs[0])
print("---" * 30)
print(openai_hyde_query_engine.query(user_input))

Hypothetical document:
OpenAI: Michael Jordan, widely regarded as one of the greatest basketball players of all time, played his final NBA game on April 16, 2003. At the age of 40, Jordan was playing for the Washington Wizards, having come out of retirement for a second time to join the team in 2001. His last game took place against the Philadelphia 76ers at the First Union Center in Philadelphia. Despite the Wizards losing the game 107-87, Jordan's performance was a memorable moment in sports history. In his final NBA appearance, Michael Jordan scored 15 points. He played 28 minutes, shooting 6-of-15 from the field and 3-of-4 from the free-throw line. The game was marked by a standing ovation from the crowd and heartfelt tributes from both teammates and opponents, celebrating the end of an illustrious career that included six NBA championships and five MVP awards.
------------------------------------------------------------------------------------------
Michael Jordan scored 13 points