In [7]:
# Import necessary libraries
from dotenv import load_dotenv
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
from llama_index.core import Settings
from llama_index.core.callbacks import CallbackManager
from langfuse.llama_index import LlamaIndexCallbackHandler
 
# Load environment variables
load_dotenv()

langfuse_callback_handler = LlamaIndexCallbackHandler()
Settings.callback_manager = CallbackManager([langfuse_callback_handler])


# Load documents from a directory (you can change this path as needed)
documents = SimpleDirectoryReader("data").load_data()

# Create an index from the documents
index = VectorStoreIndex.from_documents(documents)

# Create a query engine
query_engine = index.as_query_engine()

# Example query
response = query_engine.query("What years does the strategic plan cover?")

print(response)

The strategic plan covers the years 2024 to 2028.


In [8]:
response = query_engine.query("What ideas does the capstone project in the llm bootcamp focus on?")

print(response)

The capstone project in the LLM Bootcamp focuses on two main ideas: a Health Assistant idea that involves building a personal assistant focused on health to assist users with health-related queries, monitor health trends, send reminders, and provide information or advice; and an Extended Functionality idea that aims to extend meeting summarization to longer events like conferences, including multiple speakers and presentations.


In [17]:
 # Create an index from the documents done above
#index = VectorStoreIndex.from_documents(documents)

# Create a retriever to fetch relevant documents
retriever = index.as_retriever(retrieval_mode='similarity', k=5)

# Define your query
query = "What years does the strategic plan cover?"

# Retrieve relevant documents
relevant_docs = retriever.retrieve(query)

print(f"Number of relevant documents: {len(relevant_docs)}")
print("\n" + "="*50 + "\n")

for i, doc in enumerate(relevant_docs):
    print(f"Document {i+1}:")
    print(f"Text sample: {doc.node.get_content()[:200]}...")  # Print first 200 characters
    print(f"Metadata: {doc.node.metadata}")
    print(f"Score: {doc.score}")
    print("\n" + "="*50 + "\n")



Number of relevant documents: 2


Document 1:
Text sample: CodePath
2024-28
Strategic
Plan
+
Appendices
(V1
|
04.23.24)
Executive
Summary
Problem
Solution
Impact
and
Evidence
To
Date
Introduction
to
the
2024-28
Plan
Pillar
1:
Scale
Nationally
in
Breadth,
and
...
Metadata: {'page_label': '1', 'file_name': 'CodePath strategic plan.pdf', 'file_path': '/Users/usarfraz/extra_stuff/llm_bootcamp/lab2/data/CodePath strategic plan.pdf', 'file_type': 'application/pdf', 'file_size': 1452743, 'creation_date': '2024-09-16', 'last_modified_date': '2024-09-16'}
Score: 0.8268825437512243


Document 2:
Text sample: corporate
and
college
customers.
Given
that
it
takes
time
for
sales
personnel
to
ramp
up,
coupled
with
the
six-
to
nine-month
lead
time
required
to
close
six-figure
enterprise
deals,
this
investment
w...
Metadata: {'page_label': '24', 'file_name': 'CodePath strategic plan.pdf', 'file_path': '/Users/usarfraz/extra_stuff/llm_bootcamp/lab2/data/CodePath strategic plan.pdf', 'file_type': 'applic

In [18]:

import openai

client = openai.OpenAI()
    
prompt = f"""
    Given the following context, answer the question:
    {relevant_docs}
    Question: What years does the strategic plan cover?
    """
    
response = client.chat.completions.create(
        model="gpt-4o-mini",
        messages=[{"role": "user", "content": prompt}],
        temperature=0.2
    )
    
response_text = response.choices[0].message.content
print(response_text)

The strategic plan covers the years 2024 to 2028.


In [19]:
langfuse_callback_handler.flush()
