In [2]:
import numpy as np
import faiss
from pypdf import PdfReader
from sentence_transformers import SentenceTransformer
from transformers import pipeline

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
def load_pdf(file_path):
    reader = PdfReader(file_path)
    text = ""
    for page in reader.pages:
        extracted = page.extract_text()
        if extracted:
            text += extracted + "\n"
    return text

In [4]:
def chunk_text(text, chunk_size=300, overlap=50):

    if overlap >= chunk_size:
        raise ValueError("overlap must be smaller than chunk_size")

    chunks = []
    start = 0
    text_length = len(text)

    while start < text_length:

        end = min(start + chunk_size, text_length)

        chunk = text[start:end]

        if chunk.strip():
            chunks.append(chunk)

        start += chunk_size - overlap

    return chunks

In [5]:
embedding_model = SentenceTransformer('all-MiniLM-L6-v2')

def create_embeddings(chunks):
    embeddings = embedding_model.encode(chunks)
    return np.array(embeddings).astype("float32")


Loading weights: 100%|██████████| 103/103 [00:00<00:00, 728.36it/s, Materializing param=pooler.dense.weight]                             
[1mBertModel LOAD REPORT[0m from: sentence-transformers/all-MiniLM-L6-v2
Key                     | Status     |  | 
------------------------+------------+--+-
embeddings.position_ids | UNEXPECTED |  | 

[3mNotes:
- UNEXPECTED[3m	:can be ignored when loading from different task/architecture; not ok if you expect identical arch.[0m


In [6]:
def create_faiss_index(embeddings):
    dimension = embeddings.shape[1]
    index = faiss.IndexFlatL2(dimension)
    index.add(embeddings)
    return index


In [7]:
llm = pipeline(
    "text-generation",
    model="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
    device=-1
)

Loading weights: 100%|██████████| 201/201 [00:00<00:00, 531.24it/s, Materializing param=model.norm.weight]                              


In [8]:
def ask_question(query, index, chunks):

    query_embedding = embedding_model.encode([query])
    query_embedding = np.array(query_embedding).astype("float32")
    k = 3
    distances, indices = index.search(query_embedding, k)
    retrieved_chunks = [chunks[i] for i in indices[0]]
    context = "\n".join(retrieved_chunks)
    prompt = f"""
<|system|>
You are a helpful assistant. Answer only from the context below.
If answer is not present, say "Not found in resume".

<|user|>
Context:
{context}

Question:
{query}

<|assistant|>
"""

    response = llm(
        prompt,
        max_new_tokens=200,
        temperature=0.3,
        do_sample=True
    )

    answer = response[0]["generated_text"]
    answer = answer.split("<|assistant|>")[-1].strip()

    return answer

In [9]:
resume_text = load_pdf(r"C:\Users\vedan\Desktop\GenAIProject\resources\resume.pdf")
chunks = chunk_text(resume_text)

print("Total chunks:", len(chunks))
embeddings = create_embeddings(chunks)
index = create_faiss_index(embeddings)

Total chunks: 10


In [10]:
query = "What are Vedant's technical skills?"
answer = ask_question(query, index, chunks)
print("\nQuestion:\n", query)
print("\nAnswer:\n", answer)

Passing `generation_config` together with generation-related arguments=({'max_new_tokens', 'do_sample', 'temperature'}) is deprecated and will be removed in future versions. Please pass either a `generation_config` object OR all generation parameters explicitly, but not both.
Both `max_new_tokens` (=200) and `max_length`(=2048) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)



Question:
 What are Vedant's technical skills?

Answer:
 Vedant Sinha's technical skills include Java, Python, JavaScript, TypeScript, Spring Boot, Angular, Selenium, and TestNG.
