In [1]:
import os
os.chdir("../")
%pwd

'/home/yaswanth/Yaswanth/Projects/MediBot'

In [2]:
from langchain_community.document_loaders import PyPDFLoader,DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [16]:
#Extract Data From the PDF File
def load_pdf_file(data):
    loader= DirectoryLoader(data,
                            glob="*.pdf",
                            loader_cls=PyPDFLoader)

    documents=loader.load()

    return documents

In [17]:
extracted_data=load_pdf_file("Data/")

In [5]:
# extracted_data

In [18]:
#Split the Data into Text Chunks
def text_split(extracted_data):
    text_splitter=RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=20)
    text_chunks=text_splitter.split_documents(extracted_data)
    return text_chunks

In [19]:
text_chunks=text_split(extracted_data)
print("Length of Text Chunks", len(text_chunks))

Length of Text Chunks 1849


In [8]:
from langchain_community.embeddings import HuggingFaceEmbeddings
# from langchain_huggingface import HuggingFaceEmbeddings

In [20]:
#Download the Embeddings from Hugging Face
def download_hugging_face_embeddings():
    embeddings=HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')
    return embeddings

In [21]:
embeddings = download_hugging_face_embeddings()

In [11]:
from dotenv import load_dotenv
load_dotenv()

True

In [12]:
GROQ_API_KEY=os.environ.get('GROQ_API_KEY')

In [13]:
import os
os.environ["GROQ_API_KEY"] = GROQ_API_KEY

In [22]:
from langchain_community.vectorstores import Chroma

vectorstore = Chroma(persist_directory="./chroma_db", embedding_function=embeddings)

vectorstore.add_documents(text_chunks)

['0f30ad0e-7601-4e2c-afbc-bdda4ac529b2',
 '1115353b-8996-4715-89ba-55beb04a0f54',
 '6c7de24a-0495-4eef-982f-1db41f877c98',
 '61df7047-4bc6-4f76-bdbf-35d8fb62bca5',
 '49997bb3-caac-458d-a742-8e5142c78244',
 '28b4a3fc-a62c-4cc2-9f68-b8a8dacebb1e',
 'd9e7a544-4ae2-4996-8528-80e302da176a',
 '87869eea-abc8-4fd6-9889-0cf3c21dd671',
 'f7efbf99-fbfb-4bec-afac-622bef776d74',
 '2c50c353-16f9-4c0d-a375-0e5bd446f692',
 'a83500a7-5f48-42d5-8028-006a65e441c9',
 '3459b9da-fd35-44f3-bf8e-74678f1ed743',
 '667b5085-73e6-4c46-a784-dca06f3adb9a',
 '3f09cad3-6848-4d3a-923b-c4e7830f9ca8',
 'f4cd903e-1051-4b9d-a7bc-9c738bddc52a',
 '9466d989-635f-4796-bac8-c0beba7aa68c',
 'adbdd3ab-53b9-4502-8d23-3cd2ead7f2ad',
 '47873d23-1dad-42f7-8bb3-a98983409fcb',
 '9350b99b-0900-4127-a1f3-896fb1211178',
 'd1542d8e-92d6-4527-bfc4-95134eebb728',
 '16c41bc9-bc48-429d-aab5-d4ee4dfcc54b',
 'b1b9645b-d20c-4a7c-b0b5-f39ea154119e',
 '13167021-e132-4f4a-9d33-ac3fa1c71365',
 '9d28dd83-fdd8-4fa8-a83f-e0957ee0e4bd',
 'eda51aa3-00d4-

In [23]:
retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k":3})
retriever_docs = retriever.invoke("What is the deep learning?")
retriever_docs

[Document(metadata={'creationdate': '2023-10-27T21:38:57+05:30', 'creator': 'Adobe InDesign 18.5 (Windows)', 'moddate': '2023-10-28T13:08:29+05:30', 'page': 1, 'page_label': 'i', 'producer': 'Adobe PDF Library 17.0', 'source': 'Data/dokumen.pub_deep-learning-foundations-and-concepts-9783031454677-9783031454684.pdf', 'total_pages': 656}, page_content='Deep Learning'),
 Document(metadata={'creationdate': '2023-10-27T21:38:57+05:30', 'creator': 'Adobe InDesign 18.5 (Windows)', 'moddate': '2023-10-28T13:08:29+05:30', 'page': 1, 'page_label': 'i', 'producer': 'Adobe PDF Library 17.0', 'source': 'Data/dokumen.pub_deep-learning-foundations-and-concepts-9783031454677-9783031454684.pdf', 'total_pages': 656}, page_content='Deep Learning'),
 Document(metadata={'creationdate': '2023-10-27T21:38:57+05:30', 'creator': 'Adobe InDesign 18.5 (Windows)', 'moddate': '2023-10-28T13:08:29+05:30', 'page': 20, 'page_label': '1', 'producer': 'Adobe PDF Library 17.0', 'source': 'Data/dokumen.pub_deep-learning-

In [None]:
from langchain_groq import ChatGroq

# Initialize Groq LLM
llm = ChatGroq(
    model_name="llama-3.3-70b-versatile",
    temperature=0.7
)

In [25]:
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate


system_prompt = (
    "You are an assistant for question-answering tasks. "
    "Use the following pieces of retrieved context to answer "
    "the question. If you don't know the answer, say that you "
    "don't know. Use three sentences maximum and keep the "
    "answer concise."
    "\n\n"
    "{context}"
)


prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ]
)

In [26]:
question_answer_chain = create_stuff_documents_chain(llm, prompt)
rag_chain = create_retrieval_chain(retriever, question_answer_chain)

In [None]:
response = rag_chain.invoke({"input": "what is Machine Learning?"})
print(response["answer"])


Machine learning is one of the most important and fastest-growing fields of technology, where solutions learned from data are increasingly displacing traditional hand-crafted algorithms. It is a field that seeks to recreate the powerful capabilities of the brain in machines, and is closely related to artificial intelligence (AI). Machine learning is based on computational models, such as neural networks, which learn from data to enable new capabilities and improve performance.
