In [2]:
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.document_loaders import PyPDFDirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS

from langchain_community.embeddings import HuggingFaceBgeEmbeddings
from langchain.prompts import PromptTemplate

from langchain.chains import RetrievalQA



In [3]:
## Read the ppdfs from the folder
loader=PyPDFDirectoryLoader("./togaf_docs")

documents=loader.load()

text_splitter=RecursiveCharacterTextSplitter(chunk_size=1000,chunk_overlap=200)

final_documents=text_splitter.split_documents(documents)
final_documents[0]

Document(metadata={'source': 'togaf_docs\\TOGAF9.2 Personal Member edition.pdf', 'page': 0}, page_content='The Open Group Standard\nThe TOGAF®Standard, Version 9.2\nThe Open Group\n© 2005-2018 The Open Group, All Rights Reserved\nPersonal PDF Edition. Not for redistribution')

In [4]:
len(final_documents)

1669

In [None]:
## Embedding Using Huggingface
huggingface_embeddings=HuggingFaceBgeEmbeddings(
    model_name="BAAI/bge-small-en-v1.5",      
    model_kwargs={'device':'cpu'},
    encode_kwargs={'normalize_embeddings':True}

)

In [8]:
!pip install huggingface_hub

ERROR: Could not find a version that satisfies the requirement huggingface_embeddings (from versions: none)
ERROR: No matching distribution found for huggingface_embeddings


In [7]:
import  numpy as np
print(np.array(huggingface_embeddings.embed_query(final_documents[0].page_content)))
print(np.array(huggingface_embeddings.embed_query(final_documents[0].page_content)).shape)

NameError: name 'huggingface_embeddings' is not defined

In [None]:
## VectorStore Creation
vectorstore=FAISS.from_documents(final_documents[:120],huggingface_embeddings)

In [1]:
## Query using Similarity Search
query="WHAT IS TOGAF?"
relevant_docments=vectorstore.similarity_search(query)

print(relevant_docments[0].page_content)

NameError: name 'vectorstore' is not defined

In [None]:
retriever=vectorstore.as_retriever(search_type="similarity",search_kwargs={"k":3})
print(retriever)

In [None]:
import os
from dotenv import load_dotenv
load_dotenv()

os.environ['HUGGINGFACEHUB_API_TOKEN']=os.getenv("HUGGINGFACE_API_KEY")

The Hugging Face Hub is an platform with over 350k models, 75k datasets, and 150k demo apps (Spaces), all open source and publicly available, in an online platform where people can easily collaborate and build ML together.

In [None]:
from langchain_community.llms import HuggingFaceHub

hf=HuggingFaceHub(
    repo_id="mistralai/Mistral-7B-v0.1",
    model_kwargs={"temperature":0.1,"max_length":500}

)
query="What is the togaf version?"
hf.invoke(query)

In [None]:
#Hugging Face models can be run locally through the HuggingFacePipeline class.
from langchain_community.llms.huggingface_pipeline import HuggingFacePipeline

hf = HuggingFacePipeline.from_model_id(
    model_id="mistralai/Mistral-7B-v0.1",
    task="text-generation",
    pipeline_kwargs={"temperature": 0, "max_new_tokens": 300}
)

llm = hf 
llm.invoke(query)

In [None]:
prompt_template="""
Use the following piece of context to answer the question asked.
Please try to provide the answer only based on the context

{context}
Question:{question}

Helpful Answers:
 """

In [None]:
prompt=PromptTemplate(template=prompt_template,input_variables=["context","question"])

In [None]:
retrievalQA=RetrievalQA.from_chain_type(
    llm=hf,
    chain_type="stuff",
    retriever=retriever,
    return_source_documents=True,
    chain_type_kwargs={"prompt":prompt}
)

In [36]:
query="""WHat is TOGAF framework"""

In [37]:
# Call the QA chain with our query.
result = retrievalQA.invoke({"query": query})
print(result['result'])


Use the following piece of context to answer the question asked.
Please try to provide the answer only based on the context

Preface
The TOGAF®standar di sa no pen, industry consensus framework for Enterprise Architecture.
It is a foundational framework, which means that it is applicable to the development of any kind of
architectur ei na ny context. This foundational framework is supplemented by The Open Gr oup T OGAF
Library,1an extensive and growing portfolio of guidance material, providing practical guidance in the
application of the TOGAF framework in speciﬁc contexts.
The TOGAF Standard, V ersion 9.2 is an update to the TOGAF 9.1 standar dt op rovide additional
guidance, correct errors, address some structural challenges, and r emove obsolete content. All of these
changes will make the TOGAF framework easier to use and maintain.2
The TOGAF Documentation
The TOGAF documentation consists of a set of documents:
■The TOGAF standar d(this document) which describes the generally appli