In [1]:
from llama_index.vector_stores.qdrant import QdrantVectorStore
from qdrant_client import QdrantClient
from config import *
from pprint import pprint

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
from llama_index.embeddings.huggingface import HuggingFaceEmbedding

In [3]:
client = QdrantClient(URL,port=6333)

In [4]:
vector_store = QdrantVectorStore(client=client, collection_name=COLLECTION_NAME)

In [5]:
from llama_index.core.node_parser import SentenceWindowNodeParser
from llama_index.core.node_parser import SentenceSplitter

# create the sentence window node parser w/ default settings
node_parser = SentenceWindowNodeParser.from_defaults(
    window_size=3,
    window_metadata_key="window",
    original_text_metadata_key="original_text",
)

In [6]:
# base node parser is a sentence splitter
text_splitter = SentenceSplitter()

In [7]:
from llama_index.llms.azure_openai import AzureOpenAI
llm = AzureOpenAI(
    model="gpt-35-turbo",
    deployment_name=deployment_id_gpt4,
    api_key=key,
    azure_endpoint=endpoint,
    api_version=api_version,
)

In [8]:
from llama_index.embeddings.fastembed import FastEmbedEmbedding

embed_model = FastEmbedEmbedding()

Fetching 9 files: 100%|██████████| 9/9 [00:00<00:00, 57985.77it/s]


In [9]:
from llama_index.core import Settings

Settings.llm = llm
Settings.embed_model = embed_model
Settings.text_splitter = text_splitter

In [10]:
from llama_index.core  import VectorStoreIndex

In [11]:
from llama_index.core import SimpleDirectoryReader

documents = SimpleDirectoryReader(
    input_files=[DIR_PATH + "/HIGHWAYS.pdf"]
).load_data()

In [12]:
nodes = node_parser.get_nodes_from_documents(documents)

In [13]:
for node in nodes:
    node_embedding = embed_model.get_text_embedding(
        node.get_content(metadata_mode="all")
    )
    node.embedding = node_embedding

In [14]:
vector_store.add(nodes)

['6b8daccb-deba-414e-b6ce-42d72aeebf2e',
 '3d5e38af-631f-41b4-b90b-5db0a4717b48',
 '9ade507a-5f09-496e-8491-ed722d1ba47d',
 '3434bf5a-ba3b-4ec0-a462-03c6f1082b03',
 '24e8977d-156b-4eba-ac79-35b6feaaead4',
 'edc546ba-96f6-4091-b671-2b15231837e2',
 '54fc033f-35aa-4319-b556-8e5f152f68cb',
 'e4c84b98-ee09-414c-9643-f684a5826ffb',
 '84cb68f0-7696-49ce-b9fd-11f69e0e0ea3',
 'ea3cc83f-ec05-4d05-abcc-10cae4d9a76a',
 '95058362-7590-4a1b-b59e-4e5d77bbbbc9',
 'dab8e986-81c7-4ae2-b4cc-38b85c8fdf34',
 'ebc377f1-ca49-46fb-8bf1-3b6643bb1e3d',
 '525a7897-b14a-4667-87d5-0fdf4ff0e396',
 '444447ea-e57a-4258-8a19-2f8ad80351be',
 'e9c8dc7a-fa62-4979-9c4e-269b972d0f16',
 '87533e60-ea88-43b0-980a-c5c909b12b90',
 'e058c64e-85a8-4204-936c-ec1c5a6e5a59',
 '287eb93f-03ae-48eb-9f70-3c4745af4238',
 '85977926-34c9-4fe2-9b19-407fdf263bad',
 'b6f3bfbd-a1cf-47b3-b673-ab492d5b46df',
 '7096cab6-4c33-4f90-9c5c-b220980e8018',
 'cb814de1-611d-48ce-9812-2ea2bc702b8f',
 'd487baec-9126-4fb5-a732-db0ff7cdff2d',
 '2cc5cbc6-ac24-

In [15]:
from llama_index.core  import VectorStoreIndex

In [16]:
sentence_index = VectorStoreIndex.from_vector_store(vector_store=vector_store)

In [17]:
from llama_index.core.postprocessor import MetadataReplacementPostProcessor

query_engine = sentence_index.as_query_engine(
    similarity_top_k=2,
    # the target key defaults to `window` to match the node_parser's default
    node_postprocessors=[
        MetadataReplacementPostProcessor(target_metadata_key="window")
    ],
)

In [21]:
#query = "Describe about Tata Steel Kalinganagar?"
query = "What should the government do about road safety?"
query = "What is the Golden Quadrilateral ?"

In [22]:
response = query_engine.query(query)

In [23]:
pprint(response.response)

('There is no information provided in the given context about the Golden '
 'Quadrilateral.')
