In [1]:
#print('all ok')
from dotenv import load_dotenv
load_dotenv()

True

In [2]:
from langchain_groq import ChatGroq

In [3]:
model = ChatGroq(model='qwen/qwen3-32b')
model.invoke("who are you?").content

"<think>\nOkay, the user asked who I am. First, I need to introduce my identity, which is Qwen, a large-scale language model developed by Alibaba. Then, I should mention my main functions and application scenarios, such as answering questions, creating text, logical reasoning, coding, etc. The user may also be interested in my multilingual support and other features, so I should highlight that. I need to keep the tone friendly and natural, avoiding overly technical terms. Finally, I can ask if the user needs help with anything, maintaining a conversational style. I should also check if there's any additional information the user might need beyond the basics.\n</think>\n\nHello! I'm Qwen, a large-scale language model developed by Alibaba. I can help with a variety of tasks, including answering questions, creating text (like stories, documents, emails, and scripts), logical reasoning, coding, and more. I support multiple languages, including Chinese, English, German, French, Spanish, Por

In [4]:
from langchain_huggingface import HuggingFaceEmbeddings
model_name = "sentence-transformers/all-mpnet-base-v2"
hf_embeddings = HuggingFaceEmbeddings(model_name=model_name)
#embeddings = hf_embeddings.embed_query("Hello world")
#print(embeddings)

  from .autonotebook import tqdm as notebook_tqdm


In [5]:
#len(embeddings)

# Data Ingestion

In [6]:
from langchain.document_loaders import PyPDFLoader

In [7]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [8]:
import os
os.getcwd()

'e:\\LLMOpsPractice\\Documentportal\\notebook'

In [9]:
file_path = os.path.join(os.getcwd(),'data','sample.pdf')
file_path

'e:\\LLMOpsPractice\\Documentportal\\notebook\\data\\sample.pdf'

In [10]:
loader = PyPDFLoader(file_path)
documents = loader.load()
len(documents)  # 77pages have pdf, which is 77 documents

77

In [11]:
# this is the experiment value , there is no specfic value for chunk_size and chunk_overlap
# it depends on the use case, for example if you want to chunk the text into smaller
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=150,length_function=len)

In [12]:
docs =text_splitter.split_documents(documents)
len(docs) # now we will be having 765 documents

765

In [13]:
docs[764].metadata
#docs[764].page_content

{'producer': 'pdfTeX-1.40.25',
 'creator': 'LaTeX with hyperref',
 'creationdate': '2023-07-20T00:30:36+00:00',
 'author': '',
 'keywords': '',
 'moddate': '2023-07-20T00:30:36+00:00',
 'ptex.fullbanner': 'This is pdfTeX, Version 3.141592653-2.6-1.40.25 (TeX Live 2023) kpathsea version 6.3.5',
 'subject': '',
 'title': '',
 'trapped': '/False',
 'source': 'e:\\LLMOpsPractice\\Documentportal\\notebook\\data\\sample.pdf',
 'total_pages': 77,
 'page': 76,
 'page_label': '77'}

In [14]:
from langchain.vectorstores import FAISS

In [15]:
#in memory store - RAM store
vectorstore = FAISS.from_documents(docs, hf_embeddings)

# Retrieval Process

In [22]:
# means from the vectordatabase,we are going to fetch or retrieve or rank the most appropriate documents/Results

In [25]:
relevant_documents = vectorstore.similarity_search("llama2 fintuning benchmark expirements?", k=5)

In [26]:
relevant_documents

[Document(id='348ddbe2-3990-4134-9e1a-5d83476ad9ec', metadata={'producer': 'pdfTeX-1.40.25', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-07-20T00:30:36+00:00', 'author': '', 'keywords': '', 'moddate': '2023-07-20T00:30:36+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.141592653-2.6-1.40.25 (TeX Live 2023) kpathsea version 6.3.5', 'subject': '', 'title': '', 'trapped': '/False', 'source': 'e:\\LLMOpsPractice\\Documentportal\\notebook\\data\\sample.pdf', 'total_pages': 77, 'page': 48, 'page_label': '49'}, page_content='Llama 2\n7B 77.4 78.8 48.3 77.2 69.2 75.2 45.9 58.6 57.8 45.3\n13B 81.7 80.5 50.3 80.7 72.8 77.3 49.4 57.0 67.3 54.8\n34B 83.7 81.9 50.9 83.3 76.7 79.4 54.5 58.2 74.3 62.6\n70B 85.0 82.8 50.7 85.3 80.2 80.2 57.4 60.2 78.5 68.9\nTable 20: Performance on standard benchmarks.\nHuman-Eval MBPP\npass@1 pass@100 pass@1 pass@80\nMPT 7B 18.3 - 22.6 -\n30B 25.0 - 32.8 -\nFalcon 7B 0.0 - 11.2 -\n40B 0.6 - 29.8 -\nLlama 1\n7B 10.5 36.5 17.7 56.2\n13B 15.8 52.5 22.0

In [43]:
retriever = vectorstore.as_retriever(search_kwargs={"k": 5})

In [44]:
retriever.invoke("llama2 fintuning benchmark expirements?")

[Document(id='348ddbe2-3990-4134-9e1a-5d83476ad9ec', metadata={'producer': 'pdfTeX-1.40.25', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-07-20T00:30:36+00:00', 'author': '', 'keywords': '', 'moddate': '2023-07-20T00:30:36+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.141592653-2.6-1.40.25 (TeX Live 2023) kpathsea version 6.3.5', 'subject': '', 'title': '', 'trapped': '/False', 'source': 'e:\\LLMOpsPractice\\Documentportal\\notebook\\data\\sample.pdf', 'total_pages': 77, 'page': 48, 'page_label': '49'}, page_content='Llama 2\n7B 77.4 78.8 48.3 77.2 69.2 75.2 45.9 58.6 57.8 45.3\n13B 81.7 80.5 50.3 80.7 72.8 77.3 49.4 57.0 67.3 54.8\n34B 83.7 81.9 50.9 83.3 76.7 79.4 54.5 58.2 74.3 62.6\n70B 85.0 82.8 50.7 85.3 80.2 80.2 57.4 60.2 78.5 68.9\nTable 20: Performance on standard benchmarks.\nHuman-Eval MBPP\npass@1 pass@100 pass@1 pass@80\nMPT 7B 18.3 - 22.6 -\n30B 25.0 - 32.8 -\nFalcon 7B 0.0 - 11.2 -\n40B 0.6 - 29.8 -\nLlama 1\n7B 10.5 36.5 17.7 56.2\n13B 15.8 52.5 22.0

In [29]:
## Question -> user question
## COntext is -> retriving from the vector database ->based on the user question , 
# we are going to retriving the information from the vector database

In [45]:
# Creating chain with retriever
prompt_template = """You are a helpful assistant.Answer the question based on the context provided below
if the context does not contain sufficient information, say "I don't know".
context: {context}
Question: {question}
Answer:"""

In [46]:
from langchain.prompts import PromptTemplate

In [47]:
prompt = PromptTemplate(template=prompt_template, input_variables=["context", "question"])

In [48]:
prompt

PromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, template='You are a helpful assistant.Answer the question based on the context provided below\nif the context does not contain sufficient information, say "I don\'t know".\ncontext: {context}\nQuestion: {question}\nAnswer:')

In [36]:
# Creating the Chain using LCEL.

In [50]:
from langchain_core.output_parsers import StrOutputParser
parser = StrOutputParser()

In [51]:
rag_chain  = prompt | model |parser

In [52]:
def format_docs(docs):
    return "\n\n".join([doc.page_content for doc in docs])

In [53]:
#rag_chain.invoke("tell me about the llama2 fine tuning benchmark experiments?")
from langchain_core.runnables import RunnablePassthrough

rag_chain = (
    {"context":retriever | format_docs , "question":RunnablePassthrough()}
    | prompt
    | model
    | parser
)

In [55]:
rag_chain.invoke("can you tell me Scaling trends for the reward model?")

'<think>\nOkay, so the user is asking about the scaling trends for the reward model. Let me look through the context provided to find the relevant information.\n\nFirst, I notice there\'s a mention of "Figure 6: Scaling trends for the reward model. More data and a larger-size model generally improve." That seems directly related. Then there are two sets of data points with numbers like 0.52, 0.54 up to 0.64 and another set from 0.50 to 0.80. These might represent accuracy metrics at different stages. The models listed are 7b, 13b, 70b, GPT4, OpenAssistant. \n\nLooking at the text around Figure 6, it says that more data and larger models improve accuracy. The first figure shows accuracy on all examples, starting at 0.52 and increasing to 0.64, while the second shows accuracy on "Significantly Better" examples, starting at 0.50 and going up to 0.80. The models are ordered by size, from 7b to 70b, then GPT4 and OpenAssistant. \n\nThe context also mentions that gathering new preference dat