In [6]:
from langchain_groq import ChatGroq
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain_core.prompts import ChatPromptTemplate
import faiss
from dotenv import load_dotenv
import os
from langchain_community.docstore.in_memory import InMemoryDocstore
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough


In [7]:
load_dotenv()
os.environ["GOOGLE_API_KEY"]=os.getenv("GOOGLE_API_KEY")
os.environ["HF_TOKEN"]=os.getenv("HF_TOKEN")
os.environ["LANGCHAIN_API_KEY"]=os.getenv("LANGCHAIN_API_KEY")
os.environ["LANGCHAIN_PROJECT"]=os.getenv("LANGCHAIN_PROJECT")
os.environ["LANGCHAIN_TRACING_V2"]="true"

In [8]:
file_path=r"C:\Users\hardi\OneDrive\Desktop\begproject_pdfchatbot\Research Analyst\llama2.pdf"
loader=PyPDFLoader(file_path)
docs = []
async for doc in loader.alazy_load():
    docs.append(doc)

text_splitter=RecursiveCharacterTextSplitter(chunk_size=500,chunk_overlap=50)
split_docs=text_splitter.split_documents(docs)

embeddings = HuggingFaceEmbeddings(model="sentence-transformers/all-MiniLM-L6-v2")

index=faiss.IndexFlatL2(384)
vector_store=FAISS(
    embedding_function=embeddings,
    index=index,
    docstore=InMemoryDocstore(),
    index_to_docstore_id={}
)


In [9]:
vector_store.add_documents(split_docs)

['6ea64036-c7e9-4c85-b170-a5a3e90c6982',
 'badf5dce-a6bd-4556-a20e-df7cfa3fbe6a',
 'd2c8b0dd-5c3e-4987-b4b6-c40bd4142f0e',
 'a1982e95-46b2-42c8-98be-95d531ba31a7',
 '87fc7317-a967-4dd6-8d86-8b98f3b701c5',
 '00535eeb-235b-455a-8be5-e01eba1e5ae4',
 '1d9fa21c-714e-4e14-98df-a3cd379fbb66',
 'ef9d6a38-3d87-48f7-bafb-21ed5e8cd0a2',
 '68d771c0-073a-4b3e-b85a-6c486a5f7f40',
 'ed9c9b6f-f05d-4ca2-8ecb-653954f797d1',
 '58785073-eb67-42e3-9798-05c9a909111e',
 '24076648-c8e6-4411-88d6-2c939d1b2a0e',
 '57942b52-fb82-4810-93ec-8bb43914506c',
 'd370310b-38a6-49c4-b84e-1ce3f892c3c5',
 'a1513e6e-f328-48e5-975d-a02c8ff8f864',
 'cf890a33-e1bb-4cac-a3ed-33dd555b1ad0',
 'f91f14d6-ba2b-4574-bb8e-ade170821ea5',
 '9429fd6c-3fd0-4438-a8f7-9234e7a4b95f',
 'f4c3a360-85b7-436b-ba62-91a88cbf0b5c',
 '531d54c1-08f5-4591-afc9-692dbec40d96',
 '2fdeafaa-1512-463e-a4f0-d00238c1d590',
 '4de7582f-b9fb-4692-b9f6-b4b8cf7a5ae3',
 '99f4dfbc-94d8-4fa7-826b-4857ce54193b',
 '5f681dae-11a7-4939-a74a-ecc36fb69a5b',
 '18db4a69-1e6c-

In [10]:
retriever = vector_store.as_retriever(search_kwargs={"k": 10})


vector_store.save_local("Research Analyst")



In [11]:
model=ChatGroq(model="gemma2-9b-it")

output_parser=StrOutputParser()

prompt=ChatPromptTemplate.from_messages(
    [
        ("system","You are an expert research analyst. Provide me answer based on the query."),
        ("human","Context:\n{context}\n\nQuestion:\n{question}")
    ]
)

def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

chain=(
    {"context":retriever | format_docs, "question":RunnablePassthrough()}
    | prompt
    | model
    | StrOutputParser()
)


    

In [12]:
chain.invoke("What are functions of ollama")

"Based on the provided text, Llama 2 and Llama 2-Chat are large language models (LLMs) with several functions:\n\n* **Text Understanding and Generation:** They can understand and generate human-like text, engaging in conversations and responding to prompts.\n* **Tool Usage:** Llama 2-Chat demonstrates the ability to understand and use tools through their APIs, even without explicit training on those tools.\n* **Knowledge Organization:** Llama 2-Chat exhibits temporal organization of knowledge, suggesting it can process and understand information in a sequential manner.\n* **Democratization of AI:**\n\nThe open-source nature of Llama 2 aims to make AI technology more accessible to a wider range of developers and researchers.\n* **Transparency and Safety:** Meta emphasizes responsible use and provides a Responsible Use Guide and code examples to help developers implement safety measures.\n\nThe text highlights Llama 2's capabilities in text generation, tool interaction, and knowledge man