In [8]:
!pip install transformers accelerate langchain sentence-transformers faiss-cpu PyPDF2




In [9]:
import torch
from transformers import pipeline
from PyPDF2 import PdfReader
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains import RetrievalQA
from langchain.llms import HuggingFacePipeline
from google.colab import files


In [10]:
def extract_text_from_pdf(pdf_path):
    reader = PdfReader(pdf_path)
    text = ""
    for page in reader.pages:
        text += page.extract_text()
    return text

def create_faiss_vector_store(text, path="faiss_index"):
    splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
    chunks = splitter.split_text(text)
    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
    vector_store = FAISS.from_texts(chunks, embedding=embeddings)
    vector_store.save_local(path)

def load_faiss_vector_store(path="faiss_index"):
    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
    vector_store = FAISS.load_local(path, embeddings, allow_dangerous_deserialization=True)
    return vector_store


In [11]:
uploaded = files.upload()
pdf_path = list(uploaded.keys())[0]

text = extract_text_from_pdf(pdf_path)

create_faiss_vector_store(text)
vector_store = load_faiss_vector_store()


Saving Md__Jamil_Khan_CV_ (1).pdf to Md__Jamil_Khan_CV_ (1) (1).pdf


In [12]:
from transformers import pipeline
from langchain.llms import HuggingFacePipeline

model_name = "google/flan-t5-base"

hf_pipeline = pipeline(
    "text2text-generation",
    model=model_name,
    device_map="auto"
)

llm = HuggingFacePipeline(pipeline=hf_pipeline)
retriever = vector_store.as_retriever()
qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever)


Device set to use cpu


In [13]:
while True:
    question = input("Ask a question (or type 'exit'): ")
    if question.lower() == 'exit':
        break
    answer = qa_chain.run(question)
    print(f"\nAnswer: {answer}\n")


Ask a question (or type 'exit'): Who supervised his undergraduate thesis?


Token indices sequence length is longer than the specified maximum sequence length for this model (1078 > 512). Running this sequence through the model will result in indexing errors



Answer: Ruhul Amin

Ask a question (or type 'exit'): What is the title of his undergraduate thesis?

Answer: Harmonization of Heart Disease Dataset for Accurate Diagnosis using ML & Feature Engineering

Ask a question (or type 'exit'): What is Md. Jamil Khan’s career objective?

Answer: Seeking a dynamic and growth-oriented position where I can contribute to impactful projects, collaborate with cross-functional teams, and continuously advance my expertise in cutting-edge technologies.

Ask a question (or type 'exit'): What is his nationality and date of birth?

Answer: Bangladeshi

Ask a question (or type 'exit'): exit


In [19]:
questions_list = [
    "What's the full name of the candidate?",
"Where is Md. Jamil Khan currently located?",
"What is his nationality and date of birth?",
"What is md. jamil khan contact email address?"
]

for q in questions_list:
    print(f"\n🔹 Question: {q}")
    answer = qa_chain.run(q)
    print(f"Answer: {answer}\n")



🔹 Question: What's the full name of the candidate?
Answer: Md. Jamil Khan


🔹 Question: Where is Md. Jamil Khan currently located?
Answer: Dhaka, Bangladesh


🔹 Question: What is his nationality and date of birth?
Answer: Bangladeshi


🔹 Question: What is md. jamil khan contact email address?
Answer: jamil.mu.cse@gmail.com

