In [58]:
import logging,qdrant_client
import sys

logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

from llama_index.core import VectorStoreIndex, SimpleDirectoryReader,Settings,StorageContext
from llama_index.core.indices.query.query_transform import HyDEQueryTransform
from llama_index.core.query_engine import TransformQueryEngine
from IPython.display import Markdown, display
from llama_index.llms.ollama import Ollama
from llama_index.embeddings.ollama import OllamaEmbedding
from llama_index.core.node_parser import SentenceSplitter,SemanticSplitterNodeParser
from llama_index.vector_stores.qdrant import QdrantVectorStore
from llama_index.core.response.notebook_utils import display_source_node
from llama_index.core.retrievers import RecursiveRetriever
from llama_index.core.query_engine import RetrieverQueryEngine
from llama_index.core.schema import IndexNode



In [51]:
# load documents
documents = SimpleDirectoryReader("./uploaded_files").load_data()

In [52]:
Settings.llm=Ollama(model="llama3",request_timeout=120.0)

In [53]:
Settings.embed_model=OllamaEmbedding(model_name="snowflake-arctic-embed")

In [54]:
# Settings.text_splitter=SentenceSplitter(chunk_size=1024,chunk_overlap=20)
node_parser = SemanticSplitterNodeParser(embed_model=Settings.embed_model)


In [55]:
client=qdrant_client.QdrantClient(location=":memory:")
vector_store=QdrantVectorStore(client=client,collection_name="sampledata")

In [68]:
storage_context=StorageContext.from_defaults(vector_store=vector_store)

In [60]:

base_nodes = node_parser.get_nodes_from_documents(documents)

In [61]:
for idx, node in enumerate(base_nodes):
    node.id_ = f"node-{idx}"

Chunk References

In [62]:
sub_chunk_sizes = [128, 256, 512]
sub_node_parsers = [
    SentenceSplitter(chunk_size=c, chunk_overlap=20) for c in sub_chunk_sizes
]

all_nodes = []
for base_node in base_nodes:
    for n in sub_node_parsers:
        sub_nodes = n.get_nodes_from_documents([base_node])
        sub_inodes = [
            IndexNode.from_text_node(sn, base_node.node_id) for sn in sub_nodes
        ]
        all_nodes.extend(sub_inodes)

    # also add original node to node
    original_node = IndexNode.from_text_node(base_node, base_node.node_id)
    all_nodes.append(original_node)

In [63]:
all_nodes_dict = {n.node_id: n for n in all_nodes}

In [69]:
vector_index_chunk = VectorStoreIndex(all_nodes, embed_model=Settings.embed_model, storage_context=storage_context)

ValueError: Point id node-0 is not a valid UUID

In [67]:
vector_index_chunk.storage_context.persist(persist_dir="db")

In [None]:
vector_retriever_chunk = vector_index_chunk.as_retriever(similarity_top_k=2)

In [None]:
retriever_chunk = RecursiveRetriever(
    "vector",
    retriever_dict={"vector": vector_retriever_chunk},
    node_dict=all_nodes_dict,
    verbose=True,
)

In [None]:
nodes = retriever_chunk.retrieve(
    "Applications of GAN"
)
for node in nodes:
    display_source_node(node, source_length=2000)

[1;3;34mRetrieving with query id None: Applications of GAN
[0m[1;3;38;5;200mRetrieved node with id, entering: node-41
[0m[1;3;34mRetrieving with query id node-41: Applications of GAN
[0m

**Node ID:** node-41<br>**Similarity:** 0.725268165083198<br>**Text:** 67, pp. 23 −31, Jul.<br>

In [None]:
query_engine_chunk = RetrieverQueryEngine.from_args(retriever_chunk, llm=Settings.llm)

In [None]:
response = query_engine_chunk.query(
    "list all applications of GAN"
)
print(str(response))

[1;3;34mRetrieving with query id None: list all applications of GAN
[0m[1;3;38;5;200mRetrieved node with id, entering: node-19
[0m[1;3;34mRetrieving with query id node-19: list all applications of GAN
[0m[1;3;38;5;200mRetrieved node with id, entering: node-3
[0m[1;3;34mRetrieving with query id node-3: list all applications of GAN
[0mINFO:httpx:HTTP Request: POST http://localhost:11434/api/chat "HTTP/1.1 200 OK"
HTTP Request: POST http://localhost:11434/api/chat "HTTP/1.1 200 OK"
HTTP Request: POST http://localhost:11434/api/chat "HTTP/1.1 200 OK"
HTTP Request: POST http://localhost:11434/api/chat "HTTP/1.1 200 OK"
HTTP Request: POST http://localhost:11434/api/chat "HTTP/1.1 200 OK"
HTTP Request: POST http://localhost:11434/api/chat "HTTP/1.1 200 OK"
HTTP Request: POST http://localhost:11434/api/chat "HTTP/1.1 200 OK"
Based on the provided context, here are the listed applications of GAN:

1. Generating photorealistic images.
2. Tackling the problem of insufficient training sa

In [40]:
index = VectorStoreIndex.from_documents(documents=documents,storage_context=storage_context)

In [41]:
index.storage_context.persist(persist_dir="db")

In [47]:
query_engine=index.as_query_engine(response_mode="refine",verbose=True,similarity_top_k=10)

In [87]:
query_str = "code to add two numbers"

In [88]:
system_prompt = (
   "You are an AI assistant specialized in providing information from the uploaded document. "
   "Please ensure that your responses are strictly derived from the content of the document. "
   "If the information is not found in the document, please indicate that explicitly."
)  

In [89]:
query_with_prompt=f"{system_prompt}\nUser query:{query_str}"

In [90]:
query_engine = index.as_query_engine()
response = query_engine.query(query_with_prompt)
display(Markdown(f"{response}"))

INFO:httpx:HTTP Request: POST http://localhost:11434/api/chat "HTTP/1.1 200 OK"
HTTP Request: POST http://localhost:11434/api/chat "HTTP/1.1 200 OK"
HTTP Request: POST http://localhost:11434/api/chat "HTTP/1.1 200 OK"


I'm not finding any information related to adding two numbers in the provided context. The uploaded document appears to be about Customer Identification and KYC norms, and it does not contain any mathematical operations or coding examples. Therefore, I must indicate that the requested code is not found in the document. If you're looking for help with a specific math problem or coding task, feel free to ask, and I'll do my best to assist you within the context of this uploaded document!

In [91]:
hyde = HyDEQueryTransform(include_original=True)
hyde_query_engine = TransformQueryEngine(query_engine, hyde)
response = hyde_query_engine.query(query_with_prompt)
display(Markdown(f"{response}"))

INFO:httpx:HTTP Request: POST http://localhost:11434/api/chat "HTTP/1.1 200 OK"
HTTP Request: POST http://localhost:11434/api/chat "HTTP/1.1 200 OK"
HTTP Request: POST http://localhost:11434/api/chat "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://localhost:11434/api/chat "HTTP/1.1 200 OK"
HTTP Request: POST http://localhost:11434/api/chat "HTTP/1.1 200 OK"
HTTP Request: POST http://localhost:11434/api/chat "HTTP/1.1 200 OK"


I'm happy to help! However, I must clarify that the provided context information does not contain any relevant information about adding two numbers or performing mathematical operations. The context appears to be focused on customer identification and Know Your Customer (KYC) norms in banking and financial services.

Since the query "code to add two numbers" is unrelated to the provided context, I will explicitly indicate that the answer cannot be found within the document.

In [92]:
query_bundle = hyde(query_str)
hyde_doc = query_bundle.embedding_strs[0]

INFO:httpx:HTTP Request: POST http://localhost:11434/api/chat "HTTP/1.1 200 OK"
HTTP Request: POST http://localhost:11434/api/chat "HTTP/1.1 200 OK"
HTTP Request: POST http://localhost:11434/api/chat "HTTP/1.1 200 OK"


In [81]:
hyde_doc

"The humble printed cheque form has been a stalwart in the world of financial transactions for centuries. Despite the rise of digital payment methods, printed cheques continue to offer several key merits that make them an attractive option for many individuals and businesses.\n\nFirstly, printed cheques provide a tangible record of a transaction, allowing both the payer and payee to verify the details of the payment. This physical evidence can be invaluable in situations where disputes arise or when auditing financial records is necessary. Furthermore, printed cheques are resistant to hacking and cyber attacks, providing an added layer of security for sensitive transactions.\n\nAnother significant merit of printed cheque forms is their flexibility. Unlike digital payments, which may have limited transaction amounts or require specific bank accounts, printed cheques can be customized to suit individual needs. For instance, businesses may use printed cheques to make bulk payments or to p