In [15]:
import os
from dotenv import load_dotenv

load_dotenv()

True

In [2]:
from langchain.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain_groq.chat_models import ChatGroq
from langchain_google_genai.embeddings import GoogleGenerativeAIEmbeddings

In [17]:
groq_api_key = os.getenv('GROQ_API_KEY')
GOOGLE_API_KEY = os.getenv('GOOGLE_API_KEY')

In [5]:
llm = ChatGroq(groq_api_key=groq_api_key, model="deepseek-r1-distill-llama-70b")

In [8]:
print(llm.invoke("Hi").content)

<think>

</think>

Hello! How can I assist you today?


In [18]:
embedding_model = GoogleGenerativeAIEmbeddings(model="models/embedding-001")

In [19]:
len(embedding_model.embed_query("Hi"))

768

In [26]:
file_path = os.path.join(os.getcwd(), "data", "sample.pdf")
file_path

'e:\\2025\\Generative_AI\\Course\\LLMOps\\document_portal\\notebook\\data\\sample.pdf'

In [29]:
loader = PyPDFLoader(file_path)
docs = loader.load()

In [30]:
len(docs)

77

In [31]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500,
                                               chunk_overlap=150,
                                               length_function=len)

In [32]:
documents = text_splitter.split_documents(docs)

In [33]:
len(documents)

765

In [34]:
documents[0]

Document(metadata={'producer': 'pdfTeX-1.40.25', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-07-20T00:30:36+00:00', 'author': '', 'keywords': '', 'moddate': '2023-07-20T00:30:36+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.141592653-2.6-1.40.25 (TeX Live 2023) kpathsea version 6.3.5', 'subject': '', 'title': '', 'trapped': '/False', 'source': 'e:\\2025\\Generative_AI\\Course\\LLMOps\\document_portal\\notebook\\data\\sample.pdf', 'total_pages': 77, 'page': 0, 'page_label': '1'}, page_content='Llama 2: Open Foundation and Fine-Tuned Chat Models\nHugo Touvron∗ Louis Martin† Kevin Stone†\nPeter Albert Amjad Almahairi Yasmine Babaei Nikolay Bashlykov Soumya Batra\nPrajjwal Bhargava Shruti Bhosale Dan Bikel Lukas Blecher Cristian Canton Ferrer Moya Chen\nGuillem Cucurull David Esiobu Jude Fernandes Jeremy Fu Wenyin Fu Brian Fuller\nCynthia Gao Vedanuj Goswami Naman Goyal Anthony Hartshorn Saghar Hosseini Rui Hou\nHakan Inan Marcin Kardas Viktor Kerkez Madian Khabsa Isabe

In [35]:
documents[0].metadata

{'producer': 'pdfTeX-1.40.25',
 'creator': 'LaTeX with hyperref',
 'creationdate': '2023-07-20T00:30:36+00:00',
 'author': '',
 'keywords': '',
 'moddate': '2023-07-20T00:30:36+00:00',
 'ptex.fullbanner': 'This is pdfTeX, Version 3.141592653-2.6-1.40.25 (TeX Live 2023) kpathsea version 6.3.5',
 'subject': '',
 'title': '',
 'trapped': '/False',
 'source': 'e:\\2025\\Generative_AI\\Course\\LLMOps\\document_portal\\notebook\\data\\sample.pdf',
 'total_pages': 77,
 'page': 0,
 'page_label': '1'}

In [36]:
documents[9].page_content

'5.2 Limitations and Ethical Considerations . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 34\n5.3 Responsible Release Strategy . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 35\n6 Related Work 35\n7 Conclusion 36\nA Appendix 46\nA.1 Contributions . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 46\nA.2 Additional Details for Pretraining . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 47'

In [37]:
vectorstore = FAISS.from_documents(documents=documents, embedding=embedding_model)

In [38]:
vectorstore.similarity_search("llama2 finetuning benchmark experiments.")

[Document(id='fcb4d63f-0377-4d1f-b793-1d5860d4667d', metadata={'producer': 'pdfTeX-1.40.25', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-07-20T00:30:36+00:00', 'author': '', 'keywords': '', 'moddate': '2023-07-20T00:30:36+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.141592653-2.6-1.40.25 (TeX Live 2023) kpathsea version 6.3.5', 'subject': '', 'title': '', 'trapped': '/False', 'source': 'e:\\2025\\Generative_AI\\Course\\LLMOps\\document_portal\\notebook\\data\\sample.pdf', 'total_pages': 77, 'page': 7, 'page_label': '8'}, page_content='13B 18.9 66.1 52.6 62.3 10.9 46.9 37.0 33.9\n33B 26.0 70.0 58.4 67.6 21.4 57.8 39.8 41.7\n65B 30.7 70.7 60.5 68.6 30.8 63.4 43.5 47.6\nLlama 2\n7B 16.8 63.9 48.9 61.3 14.6 45.3 32.6 29.3\n13B 24.5 66.9 55.4 65.8 28.7 54.8 39.4 39.1\n34B 27.8 69.9 58.7 68.0 24.2 62.6 44.1 43.4\n70B 37.5 71.9 63.6 69.4 35.2 68.9 51.2 54.2\nTable 3: Overall performance on grouped academic benchmarks compared to open-source base models.'),
 Document(id='7

In [41]:
retriever = vectorstore.as_retriever(search_kwargs={"k": 3})

In [42]:
retriever.invoke("llama2 finetuning benchmark experiments.")

[Document(id='fcb4d63f-0377-4d1f-b793-1d5860d4667d', metadata={'producer': 'pdfTeX-1.40.25', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-07-20T00:30:36+00:00', 'author': '', 'keywords': '', 'moddate': '2023-07-20T00:30:36+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.141592653-2.6-1.40.25 (TeX Live 2023) kpathsea version 6.3.5', 'subject': '', 'title': '', 'trapped': '/False', 'source': 'e:\\2025\\Generative_AI\\Course\\LLMOps\\document_portal\\notebook\\data\\sample.pdf', 'total_pages': 77, 'page': 7, 'page_label': '8'}, page_content='13B 18.9 66.1 52.6 62.3 10.9 46.9 37.0 33.9\n33B 26.0 70.0 58.4 67.6 21.4 57.8 39.8 41.7\n65B 30.7 70.7 60.5 68.6 30.8 63.4 43.5 47.6\nLlama 2\n7B 16.8 63.9 48.9 61.3 14.6 45.3 32.6 29.3\n13B 24.5 66.9 55.4 65.8 28.7 54.8 39.4 39.1\n34B 27.8 69.9 58.7 68.0 24.2 62.6 44.1 43.4\n70B 37.5 71.9 63.6 69.4 35.2 68.9 51.2 54.2\nTable 3: Overall performance on grouped academic benchmarks compared to open-source base models.'),
 Document(id='7

In [43]:
from langchain.prompts import PromptTemplate

In [44]:
prompt_template = """Answer the question based on the context provided below.
    If the context does not contain sufficient information, respond with:
    "I do not have enough information about this."
    
    Context: {context}
    Question: {question}

    Answer:"""

In [45]:
prompt = PromptTemplate(
    template=prompt_template,
    input_variables=['context', 'question']
)

In [47]:
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

In [48]:
def format_docs(docs):
    return "\n\n".join([doc.page_content for doc in docs])

In [49]:
rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt 
    | llm 
    | StrOutputParser()
)

In [50]:
rag_chain.invoke("tell  me about the llama2 finetuning benchmark experiments?")

'<think>\nOkay, so I need to answer the question about the Llama2 fine-tuning benchmark experiments based on the provided context. Let me go through the context step by step.\n\nFirst, I see a table labeled "Table 3" which compares overall performance on grouped academic benchmarks. It lists different models with their respective numbers. The models include 13B, 33B, 65B, and Llama 2 in various sizes like 7B, 13B, 34B, and 70B. The numbers under each model are probably some performance metrics, but without headers, it\'s a bit unclear what each number represents. Maybe they\'re accuracy scores or some benchmark results across different tasks.\n\nNext, there\'s another section under "Fine-tuned" with "ChatGPT" having a set of numbers, all very low, like 0.23, 0.22, etc. Then, under Llama 2, there are several models with more numbers. Each model size (7B, 13B, 34B, 70B) has a long list of numbers. These could be performance metrics after fine-tuning, maybe in specific tasks or datasets.\

### One specific session for all these topic

1. Pydantic object and output parser 
2. history and memory
3. cache 
4. token counter (cost analysis)
5. pytest 
6. evaluation
7. guadrails

In [51]:
from langchain.callbacks import get_openai_callback

In [58]:
llm =ChatGroq(model="qwen/qwen3-32b")

In [59]:
with get_openai_callback() as cb:
    result = llm.invoke("tell me a joke")
    print(cb)

Tokens Used: 344
	Prompt Tokens: 12
		Prompt Tokens Cached: 0
	Completion Tokens: 332
		Reasoning Tokens: 0
Successful Requests: 1
Total Cost (USD): $0.0
