# RAG Demo

In [1]:
import os
from dotenv import load_dotenv
from llama_index.core import (
    Document, 
    VectorStoreIndex,
    Settings,
    SimpleDirectoryReader
)
from llama_index.llms.openai import OpenAI
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.core.node_parser import SimpleNodeParser

# Load environment variables
load_dotenv()
os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY')
Settings.llm = OpenAI(model="gpt-3.5-turbo")
Settings.embed_model = OpenAIEmbedding(model="text-embedding-ada-002")

## Sample files

In [3]:
documents = SimpleDirectoryReader(input_dir="data").load_data()
print(f"\nLoaded {len(documents)} documents from data/")


Loaded 6 documents from data/


In [4]:
from llama_index.core.node_parser import SentenceSplitter

splitter = SentenceSplitter(chunk_size=1024, chunk_overlap=128)
nodes = splitter.get_nodes_from_documents(documents)
print("=== Document Chunks ===")
for i, node in enumerate(nodes):
    print(f"\nChunk {i+1}:")
    print(f"Text: {node.text}")
    print(f"Doc ID: {node.ref_doc_id}")
    print("-" * 50)


=== Document Chunks ===

Chunk 1:
Text: BTS (Korean: 방탄소년단; RR: Bangtan Sonyeondan; lit. Bulletproof Boy Scouts), also known as the Bangtan Boys, is a South Korean boy band formed in 2010. The band consists of Jin, Suga, J-Hope, RM, Jimin, V, and Jungkook, who co-write or co-produce much of their material. Originally a hip hop group, they expanded their musical style to incorporate a wide range of genres, while their lyrics have focused on subjects including mental health, the troubles of school-age youth and coming of age, loss, the journey towards self-love, individualism, and the consequences of fame and recognition. Their discography and adjacent work has also referenced literature, philosophy and psychology, and includes an alternate universe storyline.
BTS debuted in 2013 under Big Hit Entertainment with the single album 2 Cool 4 Skool. BTS released their first Korean and Japanese-language studio albums, Dark & Wild and Wake Up respectively, in 2014. The group's second Korean stu

In [5]:
from llama_index.core import SummaryIndex, VectorStoreIndex

summary_index = SummaryIndex(nodes)
vector_index = VectorStoreIndex(nodes)

In [6]:
summary_index.storage_context.persist(persist_dir="assets/stored_summary_embeddings")
vector_index.storage_context.persist(persist_dir="assets/stored_vector_embeddings")

In [7]:
summary_query_engine = summary_index.as_query_engine(
    response_mode="tree_summarize"
)
vector_query_engine = vector_index.as_query_engine()

In [None]:
from llama_index.core.tools import QueryEngineTool


summary_tool = QueryEngineTool.from_defaults(
    query_engine=summary_query_engine,
    description=(
        "Useful for summarization questions"
    ),
)

vector_tool = QueryEngineTool.from_defaults(
    query_engine=vector_query_engine,
    description=(
        "Useful for retrieving specific context."
    ),
)

In [9]:
from llama_index.core.query_engine.router_query_engine import RouterQueryEngine
from llama_index.core.selectors import LLMSingleSelector

query_engine = RouterQueryEngine(
    selector=LLMSingleSelector.from_defaults(),
    query_engine_tools=[
        summary_tool,
        vector_tool,
    ],
    verbose=True
)

In [10]:
response = query_engine.query("What is the summary of the document?")
print(str(response))

[1;3;38;5;200mSelecting query engine 0: This choice indicates that the document is useful for summarization questions related to MetaGPT..
[0mThe document provides detailed information about the history, culture, and impact of various subjects including BTS, San Francisco, the Tesla Model S, Vincent van Gogh, and Batman. It covers the formation and achievements of BTS, the history and development of San Francisco, the features and impact of the Tesla Model S, the life and works of Vincent van Gogh, and the evolution and presence of Batman in popular culture across different eras and media.


In [11]:
print(len(response.source_nodes))

99


In [12]:
response = query_engine.query(
    "What is the latest version of Iphone?"
)
print(str(response))

[1;3;38;5;200mSelecting query engine 1: Useful for retrieving specific context from the MetaGPT paper..
[0mThe latest version of iPhone is the iPhone 16, 16 Plus, 16 Pro, and 16 Pro Max, which were announced on September 9, 2024.
