In [1]:
from llama_index.vector_stores.qdrant import QdrantVectorStore
from qdrant_client import QdrantClient
from config import *
from pprint import pprint

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
from llama_index.embeddings.huggingface import HuggingFaceEmbedding

In [3]:
client = QdrantClient(URL,port=6333)

In [4]:
vector_store = QdrantVectorStore(client=client, collection_name=COLLECTION_NAME)

In [5]:
from llama_index.core.node_parser import SentenceWindowNodeParser
from llama_index.core.node_parser import SentenceSplitter

# create the sentence window node parser w/ default settings
node_parser = SentenceWindowNodeParser.from_defaults(
    window_size=3,
    window_metadata_key="window",
    original_text_metadata_key="original_text",
)

In [6]:
# base node parser is a sentence splitter
text_splitter = SentenceSplitter()

In [7]:
from llama_index.llms.azure_openai import AzureOpenAI
llm = AzureOpenAI(
    model="gpt-35-turbo",
    deployment_name=deployment_id_gpt4,
    api_key=key,
    azure_endpoint=endpoint,
    api_version=api_version,
)

In [8]:
from llama_index.embeddings.huggingface import HuggingFaceEmbedding

In [9]:
embed_model = HuggingFaceEmbedding(
    model_name="sentence-transformers/all-mpnet-base-v2",max_length=512
)

In [10]:
from llama_index.core import Settings

Settings.llm = llm
Settings.embed_model = embed_model
Settings.text_splitter = text_splitter

In [11]:
from llama_index.core  import VectorStoreIndex

In [None]:
from llama_index.core import SimpleDirectoryReader

documents = SimpleDirectoryReader(
    input_files=[DIR_PATH + "/HIGHWAYS.pdf"]
).load_data()

In [None]:
nodes = node_parser.get_nodes_from_documents(documents)

In [None]:
for node in nodes:
    node_embedding = embed_model.get_text_embedding(
        node.get_content(metadata_mode="all")
    )
    node.embedding = node_embedding

In [None]:
vector_store.add(nodes)

In [14]:
from llama_index.core  import VectorStoreIndex

In [16]:
sentence_index = VectorStoreIndex.from_vector_store(vector_store=vector_store)

In [17]:
from llama_index.core.postprocessor import MetadataReplacementPostProcessor

query_engine = sentence_index.as_query_engine(
    similarity_top_k=2,
    # the target key defaults to `window` to match the node_parser's default
    node_postprocessors=[
        MetadataReplacementPostProcessor(target_metadata_key="window")
    ],
)

In [18]:
#query = "Describe about Tata Steel Kalinganagar?"
query = "What should the government do about road safety?"

In [19]:
response = query_engine.query(query)

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


In [20]:
pprint(response.response)

('The government should make road safety a political priority, develop a '
 'multidisciplinary approach, appoint a lead agency for road safety, set '
 'appropriate road safety targets, establish national road safety plans to '
 'achieve them, support the creation of safety advocacy groups, create budgets '
 'for road safety, and increase investment in demonstrably effective road '
 'safety activities.')
