In [1]:
from llama_index.vector_stores.qdrant import QdrantVectorStore
from qdrant_client import QdrantClient
from config import *
from pprint import pprint

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
from llama_index.embeddings.huggingface import HuggingFaceEmbedding

In [3]:
client = QdrantClient(URL,port=6333)

In [4]:
vector_store = QdrantVectorStore(client=client, collection_name=COLLECTION_NAME)

In [5]:
import fitz

In [6]:
doc = fitz.open(DIR_PATH + "/ROAD-SAFETY.pdf")

In [7]:
from llama_index.core.node_parser import SentenceWindowNodeParser
from llama_index.core.node_parser import SentenceSplitter

# create the sentence window node parser w/ default settings
node_parser = SentenceWindowNodeParser.from_defaults(
    window_size=3,
    window_metadata_key="window",
    original_text_metadata_key="original_text",
)

In [8]:
# base node parser is a sentence splitter
text_splitter = SentenceSplitter()

In [9]:
from llama_index.llms.azure_openai import AzureOpenAI
llm = AzureOpenAI(
    model="gpt-35-turbo",
    deployment_name=deployment_id_gpt4,
    api_key=key,
    azure_endpoint=endpoint,
    api_version=api_version,
)

In [10]:
from llama_index.embeddings.huggingface import HuggingFaceEmbedding

In [11]:
embed_model = HuggingFaceEmbedding(
    model_name="sentence-transformers/all-mpnet-base-v2",max_length=512
)

In [12]:
from llama_index.core import Settings

Settings.llm = llm
Settings.embed_model = embed_model
Settings.text_splitter = text_splitter

In [13]:
from llama_index.core  import VectorStoreIndex

In [14]:
from llama_index.core import SimpleDirectoryReader

documents = SimpleDirectoryReader(
    input_files=[DIR_PATH + "/ROAD-SAFETY.pdf"]
).load_data()

In [15]:
nodes = node_parser.get_nodes_from_documents(documents)

In [16]:
for node in nodes:
    node_embedding = embed_model.get_text_embedding(
        node.get_content(metadata_mode="all")
    )
    node.embedding = node_embedding

In [17]:
vector_store.add(nodes)

['d2280ad2-0b0c-4be7-8d92-a18a9de597ee',
 '189b56ee-a578-485d-be10-6fd7baa38220',
 '23419c3f-a29c-4009-8cc0-5473c1295546',
 '4977b262-c3f6-42e2-aa0e-c152712958c0',
 '9e4bea63-4450-481c-bb16-f26d2bbe528f',
 '5140f28e-2aff-4ab6-a938-9a7ac6fef2e5',
 '1058554a-3b76-401b-b97d-72e48ad1746f',
 'df6281de-6959-4892-adb2-79c9c8714235',
 '628aecd8-88dc-4af9-94a2-b70a015615e4',
 '4790cff8-2fa0-42d6-9632-a773ec072738',
 'cefb2de9-350e-4354-8541-ada97f282e20',
 '302f6c98-27e5-4391-8fa5-f2f42411105d',
 '00cf8ee0-9b87-4ab7-ae60-f0ad086d72f8',
 '64b71d46-1d91-42df-89ac-ea06ebbd6fe5',
 '6e04166d-b7a5-4432-a894-0645ad341e22',
 '982470bd-4000-4ed4-9fb9-df066bacb841',
 '92ec565b-a3a1-46b3-8ef1-e19acc44bb24',
 '5aeb55c7-ad04-496b-bba4-9b86d352493c',
 'ce9b42a1-5202-49fa-b082-1014ea2389b5',
 'b273f19c-e177-4aeb-b17d-7e0b766302ce',
 'fb0079b8-7b9a-481a-95da-6909da31b628',
 '5b2e8dce-f3b3-42d3-ad51-b2ff7b76db3f',
 'd047a4a4-3a5a-41bf-a249-e02dc0d0e9ce',
 'af09d30b-1797-40e0-8c4f-242c966d6837',
 'a3899ecf-9e16-

In [18]:
from llama_index.core  import VectorStoreIndex

In [19]:
sentence_index = VectorStoreIndex(nodes = nodes,vector_store=vector_store)

In [20]:
from llama_index.core.postprocessor import MetadataReplacementPostProcessor

query_engine = sentence_index.as_query_engine(
    similarity_top_k=2,
    # the target key defaults to `window` to match the node_parser's default
    node_postprocessors=[
        MetadataReplacementPostProcessor(target_metadata_key="window")
    ],
)

In [21]:
#query = "Describe about Tata Steel Kalinganagar?"
query = "What should the government do about road safety?"

In [22]:
response = query_engine.query(query)

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


In [25]:
pprint(response.response)

('The government should make road safety a political priority, develop a '
 'multidisciplinary approach to road safety, appoint a lead agency for road '
 'safety, set appropriate road safety targets and establish national road '
 'safety plans to achieve them, support the creation of safety advocacy '
 'groups, create budgets for road safety, and increase investment in '
 'demonstrably effective road safety activities. Road safety is a shared '
 'responsibility of the government and a range of civil society stakeholders, '
 'and the success of road safety strategies in all countries depends upon a '
 'broad base of support and common action from all stakeholders.')


In [None]:
query = "What is  ENERGY INTENSITY AT TSJ?"

In [None]:
response = query_engine.query(query)

In [None]:
print(response)