In [4]:
from langchain.document_loaders import TextLoader
from langchain.embeddings import OllamaEmbeddings
from langchain_community.document_loaders import PDFPlumberLoader
from langchain_experimental.text_splitter import SemanticChunker
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.chains import RetrievalQA
from langchain.llms import Ollama
import os

# Load the PDF
loader = PDFPlumberLoader("../data/prepguide.pdf")
docs = loader.load()

# Split into chunks
text_splitter = SemanticChunker(HuggingFaceEmbeddings())
documents = text_splitter.split_documents(docs)

# Instantiate the embedding model
embedder = HuggingFaceEmbeddings()

# Create the vector store and fill it with embeddings
vector = FAISS.from_documents(documents, embedder)
retriever = vector.as_retriever(search_type="similarity", search_kwargs={"k": 3})


  text_splitter = SemanticChunker(HuggingFaceEmbeddings())
  embedder = HuggingFaceEmbeddings()


In [5]:
from langchain.prompts import PromptTemplate
from langchain.chains.llm import LLMChain
from langchain.chains.combine_documents.stuff import StuffDocumentsChain
from langchain.chains import RetrievalQA

# Define llm
llm = Ollama(model="deepseek-r1")

# Define the prompt
prompt = """
1. Use the following pieces of context to answer the question at the end.
2. If you don't know the answer, just say that "I don't know" but don't make up an answer on your own.\n
3. Keep the answer crisp and limited to 3,4 sentences.
Context: {context}
Question: {question}
Helpful Answer:"""

QA_CHAIN_PROMPT = PromptTemplate.from_template(prompt)

llm_chain = LLMChain(
    llm=llm,
    prompt=QA_CHAIN_PROMPT,
    callbacks=None,
    verbose=True)

document_prompt = PromptTemplate(
    input_variables=["page_content", "source"],
    template="Context:\ncontent:{page_content}\nsource:{source}",
)

combine_documents_chain = StuffDocumentsChain(
    llm_chain=llm_chain,
    document_variable_name="context",
    document_prompt=document_prompt,
    callbacks=None)

qa = RetrievalQA(
    combine_documents_chain=combine_documents_chain,
    verbose=True,
    retriever=retriever,
    return_source_documents=True)

user_input = "What is the process of a Google Cloud's interview process?"
response = qa(user_input)["result"]
print(response)

  llm = Ollama(model="deepseek-r1")
  llm_chain = LLMChain(
  combine_documents_chain = StuffDocumentsChain(
  qa = RetrievalQA(
  response = qa(user_input)["result"]




[1m> Entering new RetrievalQA chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3m
1. Use the following pieces of context to answer the question at the end.
2. If you don't know the answer, just say that "I don't know" but don't make up an answer on your own.

3. Keep the answer crisp and limited to 3,4 sentences.
Context: Context:
content:Google Interview Preparation Guide
Customer Engineer Specialist
Our hiring process
There’s no one kind of Googler, so we’re always looking for people who can bring new
perspectives and experiences to our teams. If you’re looking for a place that values your
curiosity, passion, and desire to learn, if you’re seeking colleagues who are big thinkers eager to
take on fresh challenges as a team, you may be a future Googler. The Interviews
Google Cloud’s interview process consists of two parts during which we evaluate four
attributes: Role Related Knowledge, General Cognitive Ability, Googleyness, and Leadership