In [2]:
# =================== Install Required Libraries ===================
!pip install langchain langchain-community langchain-groq sentence-transformers faiss-cpu pypdf

Collecting langchain-community
  Using cached langchain_community-0.3.25-py3-none-any.whl.metadata (2.9 kB)
Collecting langchain-groq
  Using cached langchain_groq-0.3.2-py3-none-any.whl.metadata (2.6 kB)
Collecting faiss-cpu
  Using cached faiss_cpu-1.11.0-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (4.8 kB)
Collecting pypdf
  Downloading pypdf-5.6.0-py3-none-any.whl.metadata (7.2 kB)
Collecting langchain-core<1.0.0,>=0.3.58 (from langchain)
  Downloading langchain_core-0.3.65-py3-none-any.whl.metadata (5.8 kB)
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain-community)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting pydantic-settings<3.0.0,>=2.4.0 (from langchain-community)
  Downloading pydantic_settings-2.9.1-py3-none-any.whl.metadata (3.8 kB)
Collecting httpx-sse<1.0.0,>=0.4.0 (from langchain-community)
  Downloading httpx_sse-0.4.0-py3-none-any.whl.metadata (9.0 kB)
Collecting groq<1,>=0.4.1 (from langchain-groq)
  Downloading groq-0.28

In [3]:
# =================== Step 1: Load and Chunk PDF ===================
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import CharacterTextSplitter

loader = PyPDFLoader("/content/solid-python.pdf")
documents = loader.load_and_split()

splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=50)
docs = splitter.split_documents(documents)
print(f"✅ Total Chunks: {len(docs)}")

✅ Total Chunks: 22


In [4]:
# =================== Step 2: Embedding and FAISS ===================
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS

embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
vectorstore = FAISS.from_documents(docs, embedding_model)

  embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.5k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [5]:
# =================== Step 3: Setup Retriever with MMR ===================
retriever = vectorstore.as_retriever(search_type="mmr", search_kwargs={"k": 10, "lambda_mult": 0.5})

In [8]:
# =================== Step 4: Define LLM and Prompt Template ===================
from langchain_groq import ChatGroq
from langchain.prompts.pipeline import PipelinePromptTemplate
from langchain.prompts import PromptTemplate
from google.colab import userdata
llm = ChatGroq(
    model="llama-3.3-70b-versatile",
    api_key=userdata.get('GROQ_API_KEY'),  # Replace with your actual API key
)

context_template = PromptTemplate(input_variables=["context"], template="{context}")
question_template = PromptTemplate(input_variables=["question"], template="Answer this question based on the above context: {question}")
full_prompt = PipelinePromptTemplate(pipeline_prompts=[("context", context_template), ("question", question_template)], final_prompt=question_template)
full_prompt

PipelinePromptTemplate(input_variables=[], input_types={}, partial_variables={}, final_prompt=PromptTemplate(input_variables=['question'], input_types={}, partial_variables={}, template='Answer this question based on the above context: {question}'), pipeline_prompts=[('context', PromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, template='{context}')), ('question', PromptTemplate(input_variables=['question'], input_types={}, partial_variables={}, template='Answer this question based on the above context: {question}'))])

In [10]:
# =================== Step 5: Use Reranker ===================
from sentence_transformers.cross_encoder import CrossEncoder
reranker = CrossEncoder("cross-encoder/ms-marco-MiniLM-L6-v2")

config.json:   0%|          | 0.00/794 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/1.33k [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/711k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/132 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/3.66k [00:00<?, ?B/s]

In [11]:
# =================== Step 6: Ask a Question ===================
question = "What is the primary focus of this document?"

# Step A: Retrieve with MMR
retrieved_docs = retriever.get_relevant_documents(question)

# Step B: Score with reranker
pairs = [[question, doc.page_content] for doc in retrieved_docs]
scores = reranker.predict(pairs)

scored_docs = list(zip(retrieved_docs, scores))
sorted_docs = sorted(scored_docs, key=lambda x: x[1], reverse=True)

  retrieved_docs = retriever.get_relevant_documents(question)


In [13]:
# =================== Step 7: Prepare and Run LLM Prompt ===================
def run_llm_with_docs(docs, label="Answer"):
    top_chunks = docs[:2]
    context = "\n\n".join([doc.page_content for doc in top_chunks])
    prompt = full_prompt.format(context=context, question=question)
    response = llm.invoke(prompt)

    print(f"\n=== {label} ===")
    for i, doc in enumerate(top_chunks):
        print(f"\n--- Chunk {i+1} ---")
        print(f"Page: {doc.metadata.get('page', 'N/A')}")
        print(f"Content:\n{doc.page_content[:300]}...")
        if isinstance(doc, tuple):  # (doc, score)
            print(f"Score: {doc[1]:.4f}")
    print(f"\n📌 Response:\n{response.content}")
    return response

In [14]:
# Run before reranking
run_llm_with_docs(retrieved_docs, label="LLM Answer (Before Reranker)")


=== LLM Answer (Before Reranker) ===

--- Chunk 1 ---
Page: 18
Content:
Aspects of a Class
Thursday, Feb 22nd 2024 19/22
The 5 aspects of the class are:
a
responsibility towards parent
interface towards callers
interface towards callees
responsibility towards inheritors
class'
purpose
a
Mike Lindner: The Five Principles For SOLID Software Design...

--- Chunk 2 ---
Page: 7
Content:
Single Responsibility - After
Thursday, Feb 22nd 2024 8/22
def select_adults(people):
return [person for person in people if person.age >= 18]
def people_to_html(people):
result = "<ul>\n"
for person in people:
result += " <li>" + person.name + "</li>\n"
result += "</ul>"
return result
# ...
page = ...

📌 Response:
There is no context provided to answer this question. The conversation has just started, and no document or topic has been mentioned. If you provide more context or information, I'll be happy to help.


AIMessage(content="There is no context provided to answer this question. The conversation has just started, and no document or topic has been mentioned. If you provide more context or information, I'll be happy to help.", additional_kwargs={}, response_metadata={'token_usage': {'completion_tokens': 41, 'prompt_tokens': 62, 'total_tokens': 103, 'completion_time': 0.180988986, 'prompt_time': 0.005468331, 'queue_time': 0.217354199, 'total_time': 0.186457317}, 'model_name': 'llama-3.3-70b-versatile', 'system_fingerprint': 'fp_2ddfbb0da0', 'finish_reason': 'stop', 'logprobs': None}, id='run--9cc656d0-5284-4627-b925-42d7fd1810f6-0', usage_metadata={'input_tokens': 62, 'output_tokens': 41, 'total_tokens': 103})

In [15]:
# Run after reranking
top_reranked_docs = [doc for doc, _ in sorted_docs]
run_llm_with_docs(top_reranked_docs, label="LLM Answer (After Reranker)")


=== LLM Answer (After Reranker) ===

--- Chunk 1 ---
Page: 18
Content:
Aspects of a Class
Thursday, Feb 22nd 2024 19/22
The 5 aspects of the class are:
a
responsibility towards parent
interface towards callers
interface towards callees
responsibility towards inheritors
class'
purpose
a
Mike Lindner: The Five Principles For SOLID Software Design...

--- Chunk 2 ---
Page: 1
Content:
Motivation
Thursday, Feb 22nd 2024 2/22
Find guiding design principles to
maintain software quality over
time....

📌 Response:
There is no context provided, so I don't have any information to base my answer on. If you could provide the context or document, I would be happy to help answer your question.


AIMessage(content="There is no context provided, so I don't have any information to base my answer on. If you could provide the context or document, I would be happy to help answer your question.", additional_kwargs={}, response_metadata={'token_usage': {'completion_tokens': 39, 'prompt_tokens': 62, 'total_tokens': 101, 'completion_time': 0.141818182, 'prompt_time': 0.051244046, 'queue_time': 0.218173375, 'total_time': 0.193062228}, 'model_name': 'llama-3.3-70b-versatile', 'system_fingerprint': 'fp_6507bcfb6f', 'finish_reason': 'stop', 'logprobs': None}, id='run--ecae48de-99c7-46bd-9b01-dc2d338b4a12-0', usage_metadata={'input_tokens': 62, 'output_tokens': 39, 'total_tokens': 101})