# Hypothetical Document Embedding (HyDE)

In [None]:
import os
import sys
from dotenv import load_dotenv

# Load environment variables from a .env file
load_dotenv()

# Set the OpenAI API key environment variable (comment out if not using OpenAI)
if not os.getenv('OPENAI_API_KEY'):
    os.environ["OPENAI_API_KEY"] = input("Please enter your OpenAI API key: ")
else:
    os.environ["OPENAI_API_KEY"] = os.getenv('OPENAI_API_KEY')

sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..'))) # Add the parent directory to the path sicnce we work with notebooks
from helper_functions import *
from evaluation.evalute_rag import *

In [2]:
path = "../data/Understanding_Climate_Change.pdf"

In [3]:
class HyDERetriever:
    def __init__(self, files_path, chunk_size=1000, chunk_overlap=200):
        self.llm = ChatOpenAI(temperature=0, model_name="gpt-4o-mini", max_tokens=4000)

        self.embeddings = OpenAIEmbeddings()
        self.chunk_size = chunk_size
        self.chunk_overlap = chunk_overlap
        self.vectorstore = encode_pdf(files_path, chunk_size=self.chunk_size, chunk_overlap=self.chunk_overlap)
    
        
        self.hyde_prompt = PromptTemplate(
            input_variables=["query", "chunk_size"],
            template="""Given the question '{query}', generate a hypothetical document that directly answers this question. The document should be detailed and in-depth.
            the document size has be exactly {chunk_size} characters.""",
        )
        self.hyde_chain = self.hyde_prompt | self.llm

    def generate_hypothetical_document(self, query):
        input_variables = {"query": query, "chunk_size": self.chunk_size}
        return self.hyde_chain.invoke(input_variables).content

    def retrieve(self, query, k=3):
        hypothetical_doc = self.generate_hypothetical_document(query)
        similar_docs = self.vectorstore.similarity_search(hypothetical_doc, k=k)
        return similar_docs, hypothetical_doc


In [4]:
retriever = HyDERetriever(path)

In [5]:
test_query = "What is the main cause of climate change?"
results, hypothetical_doc = retriever.retrieve(test_query)

In [6]:
docs_content = [doc.page_content for doc in results]

print("hypothetical_doc:\n")
print(text_wrap(hypothetical_doc)+"\n")
show_context(docs_content)

hypothetical_doc:

**Title: Understanding the Main Cause of Climate Change**  Climate change primarily results from the increase of
greenhouse gases (GHGs) in the Earth's atmosphere, predominantly due to human activities. The burning of fossil
fuels—such as coal, oil, and natural gas—for energy and transportation is the leading contributor. This process releases
carbon dioxide (CO2), the most significant GHG, into the atmosphere.  Deforestation also plays a critical role, as trees
absorb CO2. When forests are cleared for agriculture or urban development, this carbon storage capacity diminishes,
exacerbating the problem. Additionally, industrial processes and agricultural practices release methane (CH4) and
nitrous oxide (N2O), potent GHGs that further intensify global warming.  The cumulative effect of these activities leads
to an enhanced greenhouse effect, trapping heat and causing global temperatures to rise. This warming disrupts weather
patterns, leading to extreme weather events,

In [9]:
test_query = "Can technology play a big role in causing climate change to reducein today's world?"
results, hypothetical_doc = retriever.retrieve(test_query)

In [10]:
docs_content = [doc.page_content for doc in results]

print("hypothetical_doc:\n")
print(text_wrap(hypothetical_doc)+"\n")
show_context(docs_content)

hypothetical_doc:

**Title: The Role of Technology in Mitigating Climate Change**  Technology can significantly reduce climate change
impacts in today's world. Renewable energy sources, such as solar and wind, have advanced, making them more efficient
and cost-effective. These technologies decrease reliance on fossil fuels, which are major contributors to greenhouse gas
emissions.  Smart grids enhance energy distribution, optimizing consumption and reducing waste. Electric vehicles (EVs)
are becoming mainstream, lowering emissions from transportation. Innovations in carbon capture and storage (CCS)
technology allow industries to mitigate their carbon footprints.  Moreover, precision agriculture utilizes data
analytics to optimize resource use, minimizing environmental impact. Sustainable building technologies improve energy
efficiency in construction.  However, technology alone cannot solve climate change; it must be paired with policy
changes, public awareness, and global cooperation.