In [1]:
import os
from dotenv import load_dotenv
load_dotenv()
os.environ['HUGGINGFACEHUB_API_TOKEN'] = os.getenv("HUGGINGFACEHUB_API_TOKEN")
os.environ["JINA_API_KEY"] = os.getenv("JINA_API_KEY")
os.environ['QDRANT_API_KEY'] = os.getenv("QDRANT_API_KEY")

In [2]:
import warnings
warnings.filterwarnings("ignore")

In [3]:
from langchain_community.llms import HuggingFaceEndpoint
repo_id ="mistralai/Mistral-7B-Instruct-v0.3"
#repo_id = "imvbhuvan/aspireai-7b-V0.6-4bit"  
llm = HuggingFaceEndpoint(
    repo_id=repo_id, temperature= 0.5, model_kwargs= {'max_length': 8192}, max_new_tokens=4096)

The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: fineGrained).
Your token has been saved to C:\Users\imvbh\.cache\huggingface\token
Login successful


In [4]:
from langchain_community.document_loaders import TextLoader
from langchain_community.document_loaders import DirectoryLoader

loader = DirectoryLoader(r'C:\Users\imvbh\Desktop\LLMs\hface\Docs', glob="**/*.txt", loader_cls=TextLoader)


In [5]:
documents = loader.load()

In [6]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
# split the extracted data into text chunks using the text_splitter, which splits the text based on the specified number of characters and overlap
text_splitter = RecursiveCharacterTextSplitter(chunk_size=200, chunk_overlap=20)
text_chunks = text_splitter.split_documents(documents)
# print the number of chunks obtained
len(text_chunks)

45

In [7]:
from langchain_community.embeddings import HuggingFaceEmbeddings

embedding_model = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-l6-v2",
    multi_process=True,
    model_kwargs={"device": "cpu"},
    encode_kwargs={"normalize_embeddings": True},  # Set `True` for cosine similarity
)

In [8]:
from langchain.prompts import PromptTemplate

template = """You are a direct and concise assistant. Answer the question using only the information provided in the context. Give only the specific answer requested, with no additional explanation or information.
            Reply with - I cannot answer that question with my limitations.
Context: {context}

Question: {question}

Answer:"""

prompt = PromptTemplate(
    template=template, input_variables=["context", "question"]
)


In [10]:
from langchain_qdrant import QdrantVectorStore

url="https://3bb301e0-87bb-460b-9ef0-c79b4c1b53e4.us-east4-0.gcp.cloud.qdrant.io:6333"
apikey = os.getenv("QDRANT_API_KEY")

qdrant = QdrantVectorStore.from_existing_collection(
    embedding=embedding_model,
    collection_name="assignrag",
    url=url,
    api_key=apikey
)

In [11]:
from langchain.retrievers import ContextualCompressionRetriever
from langchain_community.document_compressors import JinaRerank

compressor = JinaRerank()

In [12]:
retriever = qdrant.as_retriever(search_kwargs={"k": 3})

In [13]:
compression_retriever = ContextualCompressionRetriever(
    base_compressor=compressor, base_retriever=retriever
)

In [14]:
from langchain.chains import RetrievalQA
qa = RetrievalQA.from_chain_type(llm=llm, retriever=compression_retriever,chain_type_kwargs={"prompt": prompt})


query = "How many units of 3BHK are available at Cedar Heights"
response = qa.invoke(query)

In [15]:
print(response['result'])

 7 units
