In [37]:
#Loading the PDF file using PyPDFLoader
from langchain_community.document_loaders import PyPDFLoader

def load_pdf(file_path: str):
    loader = PyPDFLoader(file_path)
    pages = loader.load()

    print(f"Total Pages Found: {len(pages)}")
    print("-" * 30)
    print(f"Content of Page 1 (First 500 chars):\n{pages[0].page_content[:500]}")
    print("-" * 30)
    print(f"Metadata of Page 1: {pages[0].metadata}")

    return pages



In [38]:
my_pages=load_pdf('Rag_docs.pdf')

Total Pages Found: 8
------------------------------
Content of Page 1 (First 500 chars):
Candidate:  Gurudevi  Lavanya  Gopisetty  
 
1.  Personal  &  Academic  Overview  
‚óè  Full  Name:  Gurudevi  Lavanya  Gopisetty  ‚óè  Date  of  Birth  (DOB):  june  10  ,  1999  ‚óè  Location:  Long  Beach,  California,  USA  ‚óè  Email:  gglavanya06@gmail.com  ‚óè  Phone:  +1  (669)  306-3851  
Education  
‚óè  Master‚Äôs  Degree:  M.S.  in  Computer  Science  ‚óè  University:  California  State  University  ‚óè  Expected  Graduation:  December  2025  ‚óè  Cumulative  GPA:  3.909  /  4.0  
 
2.  Professional  Ro
------------------------------
Metadata of Page 1: {'producer': 'Skia/PDF m145 Google Docs Renderer', 'creator': 'PyPDF', 'creationdate': '', 'title': 'Rag_docs', 'source': 'Rag_docs.pdf', 'total_pages': 8, 'page': 0, 'page_label': '1'}


In [39]:
# creating the chunks to give to the vector database
from langchain_text_splitters import RecursiveCharacterTextSplitter

def create_chunks(text):

    splitter = RecursiveCharacterTextSplitter(
        chunk_size=500,
        chunk_overlap=50,
        separators=["\n\n", "\n", " ", ""]
    )
    chunks = splitter.split_documents(text)
    
    return chunks

In [40]:
my_chunks=create_chunks(my_pages)


print(f"‚úÖ Created {len(my_chunks)} chunks.")
print(f"First chunk preview: {my_chunks[0].page_content[:100]}")

‚úÖ Created 20 chunks.
First chunk preview: Candidate:  Gurudevi  Lavanya  Gopisetty  
 
1.  Personal  &  Academic  Overview  
‚óè  Full  Name:  G


In [41]:
# converting the chunks into embbedings into numerical form, so it can understand
from langchain_huggingface import HuggingFaceEmbeddings

def get_embeddings_model():
    model_name= "sentence-transformers/all-MiniLM-L6-v2"
    model_kwargs = {"device": "cpu"}
    encode_kwargs={'normalize_embeddings': False}

    return HuggingFaceEmbeddings(model_name=model_name, model_kwargs=model_kwargs, encode_kwargs=encode_kwargs) 

embed_model= get_embeddings_model()



In [42]:
test_text = "This is a test to see what a vector looks like."
vector = embed_model.embed_query(test_text)

print(f"‚úÖ Embedding Successful!")
print(f"Vector Length: {len(vector)}") # Should be 384 for MiniLM
print(f"First 5 numbers of the vector: {vector[:5]}")

‚úÖ Embedding Successful!
Vector Length: 384
First 5 numbers of the vector: [-0.021258514374494553, 0.03264220803976059, -0.05218735337257385, -0.006472242064774036, 0.06284356117248535]


In [43]:
# now storing the embeddings into a vector database where it can be stored according to its value and positions and can get the similarity by distance calculation
from langchain_chroma import Chroma 
def create_vector_db(chunks, embed_model):
    vector_db = Chroma.from_documents(
        documents=chunks,
        embedding=embed_model,
        persist_directory="./vector_db"
    )
    return vector_db

In [44]:
vector_db = create_vector_db(my_chunks, embed_model)

# --- INSPECTION: The "Similarity Search" Test ---
# Let's see if it can find the right chunk without an LLM
query = "What is the name of the person?" # Change this to a topic in your PDF
search_results = vector_db.similarity_search(query, k=1) # Get top 2 matches

print("\n--- TOP SEARCH RESULT ---")
print(f"Content: {search_results[0].page_content[:200]}...")
print(f"Metadata: {search_results[0].metadata}")


--- TOP SEARCH RESULT ---
Content: Candidate:  Gurudevi  Lavanya  Gopisetty  
 
1.  Personal  &  Academic  Overview  
‚óè  Full  Name:  Gurudevi  Lavanya  Gopisetty  ‚óè  Date  of  Birth  (DOB):  june  10  ,  1999  ‚óè  Location:  Long  Beac...
Metadata: {'producer': 'Skia/PDF m145 Google Docs Renderer', 'page_label': '1', 'creator': 'PyPDF', 'source': 'Rag_docs.pdf', 'creationdate': '', 'page': 0, 'title': 'Rag_docs', 'total_pages': 8}


In [45]:
from langchain_ollama import ChatOllama
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

def get_answer(question, vector_db):
    llm = ChatOllama(model="llama3.2:1b")

    # 1. Define the prompt
    template = template = """
    You are a professional and friendly career assistant. 
    Your goal is to answer questions about a candidate's skills based on their resume/profile data.
    your goal is to answer the question as if the candidate answers to the requiterer.

    RULES:
    1. Do NOT say "Based on the context" or "According to the document."
    2. Speak as if you already know the candidate well.
    3. Use a friendly, conversational, and professional tone.
    4. If the information isn't there, politely say you aren't sure about that specific detail.
    5. Keep it concise but enthusiastic.
    6. If you don't have enough information to answer, ask for clarification.
    7. if the question is ask about a job/role she fits in, i want you to answer accordingly , and also put the skills and relevant projects worked on.
    8. start the sentence like  "Lavanya did this, Lavanya did that, Lavanya is a professional in this field, Lavanya has worked on this project, Lavanya is skilled in this area."
    9. if the question is ask about a job/role she fits in, i want you to answer accordingly , and also put the skills and relevant projects worked on.

    Candidate Information:
    {context}
    
    User Question: {question}
    Friendly Response:
   
    """
    prompt = ChatPromptTemplate.from_template(template)

    # 2. Build the chain using the "Pipe" | operator
    # This says: Get context -> Pass to prompt -> Pass to LLM -> Parse as string
    chain = (
        {"context": vector_db.as_retriever(), "question": RunnablePassthrough()}
        | prompt
        | llm
        | StrOutputParser()
    )

    # 3. Run it
    return chain.invoke(question)



In [46]:
answer = get_answer("is she good in machine learning " \
"", vector_db)
print(f"ü§ñ AI ANSWER:\n{answer}")

ü§ñ AI ANSWER:
Lavanya is very skilled in machine learning, having demonstrated a strong applied understanding of ML fundamentals and mathematical intuition behind models. She has worked on several projects that showcase her expertise, including developing predictive models for real-world data. 

Some relevant skills and projects she's worked on include: building neural networks for image classification, implementing decision trees for regression analysis, and optimizing machine learning algorithms using hyperparameter tuning techniques.

Lavanya is also well-versed in the theoretical aspects of machine learning, having learned from her academic background and practical experience. She has a solid grasp of data structures and algorithms, operating systems, finite automata, and deep learning concepts.


In [62]:
#implementation of cache for storing the results of the query

from langchain_chroma import Chroma
CACHE_PATH = "./my_semantic_cache"
SIMILARITY_THRESHOLD = 0.8
TTL_seconds = 60 * 60 * 24 * 7 # 1 week
Max_CACHE_SIZE = 100

def check_cache(question,embed_model):
    cache_db= Chroma(
        embedding_function=embed_model,
        persist_directory=CACHE_PATH,
        collection_name="qa_cache"
    )

    #to search for the query in the cache
    results = cache_db.similarity_search_with_relevance_scores(question, k=1)
    if results and results[0][1] >= 0.90:
        doc, score = results[0]
        age = time.time() - doc.metadata.get("timestamp",0)
        if age > TTL_seconds:
            print(f"Cache expired after {TTL_seconds} seconds.")
            return None,0
        
        #LRU update
        doc.metadata["hit_count"] += 1
        doc.metadata["timestamp"] = time.time()


        doc,score = results[0]

        if score > SIMILARITY_THRESHOLD:
            print(f"‚úÖ Cache Hit! Score: {score}")
            return doc.metadata["answer"],score
    
    return None,0


In [63]:
import time
import uuid
def save_to_cache(question, answer, embed_model):
    cache_db= Chroma(
        embedding_function=embed_model,
        persist_directory=CACHE_PATH,
        collection_name="qa_cache"
    )

    unique_id = str(uuid.uuid4())
    cache_db.add_texts(
        texts=[question],
        ids=[unique_id],
        metadatas=[{"answer": answer,
                    "timestamp": time.time(),
                    "hit_count": 0}]
    )
    print(f"üíæ Saved to cache with ID: {unique_id}")

In [64]:
def cleanup_cache_by_id(cache_db, max_size=100):
    # 1. Get all IDs and timestamps from the cache
    data = cache_db.get(include=['metadatas'])
    ids = data['ids']
    metadatas = data['metadatas']
    
    # 2. If we are over the limit, find the oldest ones to delete
    if len(ids) > max_size:
        # Create a list of (id, timestamp) tuples
        entries = []
        for i in range(len(ids)):
            entries.append((ids[i], metadatas[i]['timestamp']))
        
        # Sort by timestamp (oldest first)
        entries.sort(key=lambda x: x[1])
        
        # 3. Identify how many to delete
        num_to_delete = len(ids) - max_size
        ids_to_remove = [e[0] for e in entries[:num_to_delete]]
        
        # 4. EXECUTE DELETION
        cache_db.delete(ids=ids_to_remove)
        print(f"üßπ Cleaned up {len(ids_to_remove)} old cache entries.")

In [65]:
def get_answer_with_cache(question, vector_db, embed_model):
    cached_answer, score = check_cache(question, embed_model)
    if cached_answer:
        return f"‚ö° [Cache Hit - {score:.2f}] {cached_answer}"

    answer = get_answer(question, vector_db)
    save_to_cache(question, answer, embed_model)
    return answer

In [66]:
question = "is she good in machine learning"
answer = get_answer_with_cache(question, vector_db, embed_model)
print(f"ü§ñ AI ANSWER:\n{answer}")

Cache expired after 604800 seconds.
üíæ Saved to cache with ID: 7b756168-a3ca-47c9-9db5-a8b05705f5e9
ü§ñ AI ANSWER:
Lavanya has a strong applied understanding of machine learning fundamentals, which includes mathematical intuition behind models, best practices for real-world ML systems. She's also shown a solid grasp of key concepts in data structures and algorithms, operating systems, finite automata, machine learning, deep learning, artificial intelligence, probability & statistics.

She has demonstrated practical experience with these topics by working on relevant projects, such as developing predictive models using Python libraries like scikit-learn and TensorFlow. Her understanding is strong, but I'd love to see more hands-on experience in this area.


In [67]:
import os
from langchain_chroma import Chroma
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
import hashlib
import time
# Constants
SIMILARITY_THRESHOLD = 0.90

def run_ai_pipeline(user_query, vector_db, cache_db, rag_chain):
    """
    The main workflow: Semantic Cache -> RAG -> Update Cache
    """
    # 1. Check Semantic Cache
    # similarity_search_with_relevance_scores returns (Document, Score)
    cache_results = cache_db.similarity_search_with_relevance_scores(user_query, k=1)
    
    if cache_results and cache_results[0][1] >= SIMILARITY_THRESHOLD:
        doc, score = cache_results[0]
        return f"‚ö° [CACHE HIT - {score:.2f}]\n{doc.metadata['answer']}"

    # 2. Cache Miss: Run the RAG Chain
    print("üê¢ Cache Miss. Thinking...")
    response = rag_chain.invoke(user_query)
    
    # Handle both dictionary and string outputs from the chain
    answer = response if isinstance(response, str) else response.get("answer", "I'm sorry, I couldn't find an answer.")

    q_id = hashlib.md5(user_query.lower().encode()).hexdigest()
    # 3. Update Cache for next time
    cache_db.add_texts(
        texts=[user_query],
        ids=[q_id],
        metadatas=[{"answer": answer}]
    )

    #4 The cleanup
    cleanup_cache_by_id(cache_db, max_size=100)
    
    return answer

In [None]:
# --- ONE-TIME SETUP ---
# (Assumes you have already defined embed_model, vector_db, and llm in previous cells)
import time
# 1. Initialize Cache DB
cache_db = Chroma(
    persist_directory="./my_semantic_cache",
    embedding_function=embed_model,
    collection_name="qa_cache"
)
#using llm again
llm = ChatOllama(model="llama3.2:1b")

# 2. Create the RAG Chain
prompt = ChatPromptTemplate.from_template(template  = """
    You are a professional and friendly career assistant. 
    Your goal is to answer questions about a candidate's skills based on their resume/profile data.
    your goal is to answer the question as if the candidate answers to the requiterer.

    RULES:
    1. Do NOT say "Based on the context" or "According to the document."
    2. Speak as if you already know the candidate well.
    3. Use a friendly, conversational, and professional tone.
    4. If the information isn't there, politely say you aren't sure about that specific detail.
    5. Keep it concise but enthusiastic.
    6. If you don't have enough information to answer, ask for clarification.
    7. if the question is ask about a job/role she fits in, i want you to answer accordingly , and also put the skills and relevant projects worked on.
    8. start the sentence like  "Lavanya did this, Lavanya did that, Lavanya is a professional in this field, Lavanya has worked on this project, Lavanya is skilled in this area."
    9. Make sure the answer should be in 3 sentences and should be relevant to the question.
    10. Also Metion the skills when needed.
    11. if the user asks is she fit in this job , mention the relevant projects she did while answering the project.                                   
                                          
                                          

    Candidate Information:
    {context}
    
    User Question: {question}
    Friendly Response:
   
    """)

rag_chain = (
    {"context": vector_db.as_retriever(search_kwargs={"k": 3}), "question": RunnablePassthrough()}
    | prompt | llm | StrOutputParser()
)

# --- THE MAIN LOOP ---
print("Ready! Type 'exit' to stop.")
while True:
    time.sleep(0.5)  # To prevent rapid fire inputs
    query = input("\nAsk about the candidate: ")
    
    if query.lower() in ['exit', 'quit', 'stop']:
        print("Stopping session. Goodbye!")
        break
        
    final_response = run_ai_pipeline(query, vector_db, cache_db, rag_chain)
    print(f"\nü§ñ Assistant: {final_response}")

Ready! Type 'exit' to stop.
üê¢ Cache Miss. Thinking...

ü§ñ Assistant: Lavanya has a solid foundation in machine learning fundamentals, with a strong understanding of data structures and algorithms. She's also demonstrated mathematical intuition behind models and has applied this knowledge to real-world ML systems.

Lavanya has worked on several projects that showcase her skills in machine learning, including developing and testing predictive models using scikit-learn and TensorFlow. Her experience with deep learning frameworks like Keras and PyTorch has been particularly valuable in terms of practical impact and application.

I'm confident that Lavanya's expertise in machine learning would be a strong asset for any organization looking to leverage ML capabilities. With her academic knowledge, mathematical intuition, and practical experience working on projects, she would be well-suited for roles in AI research and development, data science, or related fields.

ü§ñ Assistant: ‚ö° [