In [19]:
# Import necessary modules for URL handling, warnings, file path manipulation, and pretty printing
import urllib
import warnings
from pathlib import Path as p  # Import Path class for file and directory manipulation
from pprint import pprint  # For printing data structures in a readable format

# Importing libraries for data handling and machine learning
import pandas as pd  # For data manipulation and analysis
from langchain import PromptTemplate  # For creating prompt templates in LangChain
from langchain.chains.question_answering import load_qa_chain  # To load a Question Answering chain
from langchain.document_loaders import PyPDFLoader  # To load and process PDF documents
from langchain.text_splitter import RecursiveCharacterTextSplitter  # To split text into manageable chunks
from langchain.vectorstores import Chroma  # For managing and querying vector-based document embeddings
from langchain.chains import RetrievalQA  # To create a question-answering chain with a retriever

# Suppress warnings for cleaner output
warnings.filterwarnings("ignore")

# Note: Restart the Python kernel if issues arise with LangChain imports.

In [20]:
from langchain_google_genai import ChatGoogleGenerativeAI
from dotenv import load_dotenv
import os

# Load environment variables
load_dotenv()
GOOGLE_API_KEY = os.getenv("GEMINI_API_KEY")

model = ChatGoogleGenerativeAI(model="gemini-1.5-flash",google_api_key=GOOGLE_API_KEY,
                             temperature=0.2,convert_system_message_to_human=True)

In [21]:

pdf_loader = PyPDFLoader("./pdfs/micro.pdf")
pages = pdf_loader.load_and_split()
# print(pages[3].page_content)

In [22]:
len(pages)

34

RAG Pipeline: Embedding + Gemini (LLM)

In [23]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings

In [24]:

text_splitter = RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=1000)
context = "\n\n".join(str(p.page_content) for p in pages)
texts = text_splitter.split_text(context)

In [25]:
embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001",google_api_key=GOOGLE_API_KEY)

In [26]:

vector_index = Chroma.from_texts(texts, embeddings).as_retriever(search_kwargs={"k":5})

In [27]:

qa_chain = RetrievalQA.from_chain_type(
    model,
    retriever=vector_index,
    return_source_documents=True

)

In [28]:

template = """Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer. Keep the answer as concise as possible. Always use human face reaction emoji at the end of converstaion
{context}
Question: {question}
Helpful Answer:"""
QA_CHAIN_PROMPT = PromptTemplate.from_template(template)# Run chain
qa_chain = RetrievalQA.from_chain_type(
    model,
    retriever=vector_index,
    return_source_documents=True,
    chain_type_kwargs={"prompt": QA_CHAIN_PROMPT}
)

In [29]:
question = "How to add two numbers in 8085"
result = qa_chain({"query": question})
answer = result["result"]
print(answer,sep="\n")

Use the `ADD` instruction for 8-bit numbers and `DAD` for 16-bit numbers.  For BCD numbers, use `ADD` followed by `DAA`. 😊


In [30]:
question = "How to add two numbers in 8085"
result = qa_chain({"query": question})
answer = result["result"]

for line in answer.splitlines():
    print(line)

Use the `ADD` instruction for 8-bit numbers and `DAD` for 16-bit numbers.  For BCD numbers, use `ADD` followed by `DAA`. 😊
