In [None]:
from langchain_groq import ChatGroq
from langchain.vectorstores import FAISS
from langchain_community.document_loaders import PyPDFLoader
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from google.colab import userdata
from ragas import evaluate
from ragas.metrics import answer_relevancy, faithfulness
from langchain.prompts import PromptTemplate
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain
from langchain.retrievers.multi_query import MultiQueryRetriever
from datasets import Dataset
import warnings
warnings.filterwarnings('ignore')

In [None]:
sec_key = userdata.get('GROQ_SECRET_KEY')

In [None]:
llm = ChatGroq(
    model_name='llama-3.1-8b-instant',
    groq_api_key=sec_key,
    temperature=0
)

In [None]:
#testing llm
responce = llm.invoke('what are the two main ingridients of samosa')
print(responce.content)

In [None]:
path_to_file = '/content/Medical_book.pdf'

In [None]:
def embed_function():
  embeding = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2',model_kwargs={'device':'cpu'})
  return embeding

In [None]:
def load_data(path):
  loader = PyPDFLoader(path)
  data = loader.load()

  split = RecursiveCharacterTextSplitter(
      chunk_size = 500,
      chunk_overlap=100
  )
  docs = split.split_documents(data)

  embeding = embed_function()
  vectordb = FAISS.from_documents(docs,embeding)

  retriever = vectordb.as_retriever(
      search_kwargs={'k':4},
      search_type='mmr'
  )
  return retriever

In [None]:
retriever  = load_data(path_to_file)

In [None]:
prompt = """
You are a helpful medical assistant specialized in extracting information from a medical textbook.
Answer the user's question **only using the information provided in the context** below.
If the answer is not contained within the context, respond with exactly: "I don't know."
Do not add any introductory or explanatory text.
Do not guess or fabricate answers—only use the context.

User's question: {question}

Context:
{context}

Answer:
"""
promp = PromptTemplate(template=prompt,input_variables=['context','question'])

In [None]:
multi_query = MultiQueryRetriever.from_llm(
    retriever=retriever,
    llm=llm
)

memory = ConversationBufferMemory(
    memory_key='chat_history',
    return_messages=True,
    output_key='answer'
)

In [None]:
qa_chain = ConversationalRetrievalChain.from_llm(
    llm=llm,
    retriever=multi_query,
    memory=memory,
    return_source_documents=True,
    combine_docs_chain_kwargs={'prompt':promp}
)

In [19]:
query = 'Treatment  of diabetes'
respon = qa_chain({'question':query})
respon['answer']

'Drug therapy may be directed towards increasing insulin secretion, increasing insulin sensitivity, or increasing insulin penetration of the cells.'

In [None]:
questions = ['what is acne',
             'treatment of acne']

answer = []
context = []
memory.clear()

for query in questions:
  respon = qa_chain({'question': query})
  answer.append(respon['answer'])
  context.append([docs.page_content for docs in multi_query.get_relevant_documents(query)])


# to dict
data = {
    'question':questions,
    'answer': answer,
    'retrieved_contexts':context

}

dataset = Dataset.from_dict(data)

In [None]:
result =  evaluate(
    dataset=dataset,
    metrics=[faithfulness,
             answer_relevancy],
    llm=llm,
    embeddings=embed_function()
)

In [20]:
result_df = result.to_pandas()
result_df

Unnamed: 0,user_input,retrieved_contexts,response,faithfulness,answer_relevancy
0,what is acne,[Volume 4: N-S...................................,Acne is a common skin disease characterized by...,1.0,0.786996
1,treatment of acne,[Volume 4: N-S...................................,• avoid abrasive cleansers\n• use noncomedogen...,1.0,0.753255
