In [22]:
from langchain_community.document_loaders import PDFPlumberLoader
loader = PDFPlumberLoader("./pdfs/cv.pdf")
docs = loader.load()

# Check the number of pages
print("Number of pages in the PDF:",len(docs))

# Load the random page content
docs[0].page_content

Number of pages in the PDF: 1


"KHALSS YASSINE\nyassinekh007007@gmail.com +212 6 27 68 92 95 Casablanca, MOROCCO\ngithub.com/YASSINEKS007 https://khalss-yassine-portfolio-website.vercel.app/\nPROFILE\nI'm a student with a passion for artificial intelligence and machine learning, currently in my first year of a\nmaster's degree in distributed systems and artificial intelligence at ENSET Mohammedia. Thanks to my\ncourses and academic projects, I have acquired solid skills in programming and data analysis. I'm keen to\napply and further develop these skills in a stimulating professional environment.\nEXPERIENCES\nInternship 04/2023 – 05/2023\nNational Electricity and Drinking Water Board (ONEE)\nAs part of my final year project,I did a two-month internship at ONEE, where I\ndesigned and developed a web application for the company's fuel management\nsystem.\nEDUCATION\nMaster in Distributed Systems and ArtificialIntelligence (SDIA) 09/2023 – 06/2025\nEcole normale supérieure de l'enseignement technique(ENSET) Mohammedia

In [23]:
from langchain_experimental.text_splitter import SemanticChunker
from langchain.embeddings import HuggingFaceEmbeddings

text_splitter = SemanticChunker(HuggingFaceEmbeddings())
documents = text_splitter.split_documents(docs)

In [24]:
print("Number of chunks created: ", len(documents))

Number of chunks created:  2


In [25]:
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS

# Instantiate the embedding model
embedder = HuggingFaceEmbeddings()

# Create the vector store 
vector = FAISS.from_documents(documents, embedder)

In [26]:
retriever = vector.as_retriever(search_type="similarity", search_kwargs={"k": 3})
retrieved_docs = retriever.invoke("education?")

In [27]:
retrieved_docs

[Document(page_content="KHALSS YASSINE\nyassinekh007007@gmail.com +212 6 27 68 92 95 Casablanca, MOROCCO\ngithub.com/YASSINEKS007 https://khalss-yassine-portfolio-website.vercel.app/\nPROFILE\nI'm a student with a passion for artificial intelligence and machine learning, currently in my first year of a\nmaster's degree in distributed systems and artificial intelligence at ENSET Mohammedia. Thanks to my\ncourses and academic projects, I have acquired solid skills in programming and data analysis. I'm keen to\napply and further develop these skills in a stimulating professional environment. EXPERIENCES\nInternship 04/2023 – 05/2023\nNational Electricity and Drinking Water Board (ONEE)\nAs part of my final year project,I did a two-month internship at ONEE, where I\ndesigned and developed a web application for the company's fuel management\nsystem. EDUCATION\nMaster in Distributed Systems and ArtificialIntelligence (SDIA) 09/2023 – 06/2025\nEcole normale supérieure de l'enseignement techni

In [28]:
len(documents)

2

In [29]:
from langchain_community.llms import Ollama

# Define llm
llm = Ollama(model="mistral")

In [30]:
from langchain.chains import RetrievalQA
from langchain.chains.llm import LLMChain
from langchain.chains.combine_documents.stuff import StuffDocumentsChain
from langchain.prompts import PromptTemplate

prompt = """
1. Use the following pieces of context to answer the question at the end.
2. If you don't know the answer, just say that "I don't know" but don't make up an answer on your own.\n
3. Keep the answer crisp and limited to 3,4 sentences.

Context: {context}

Question: {question}

Helpful Answer:"""


QA_CHAIN_PROMPT = PromptTemplate.from_template(prompt) 

llm_chain = LLMChain(
                  llm=llm, 
                  prompt=QA_CHAIN_PROMPT, 
                  callbacks=None, 
                  verbose=False)

document_prompt = PromptTemplate(
    input_variables=["page_content", "source"],
    template="Context:\ncontent:{page_content}\nsource:{source}",
)

combine_documents_chain = StuffDocumentsChain(
                  llm_chain=llm_chain,
                  document_variable_name="context",
                  document_prompt=document_prompt,
                  callbacks=None,
                  verbose=False
              )

qa = RetrievalQA(
                  combine_documents_chain=combine_documents_chain,
                  verbose=False,
                  retriever=retriever,
                  return_source_documents=False,
              )

In [31]:
question = "What are the frameworks this candidat knows"
response = qa({"query": question})
answer = response.get('result', 'No answer found')
print("Answer:", answer)


Answer:  The candidate is familiar with Backend Frameworks such as Django, Spring Boot, and Express.js, and Frontend Frameworks including React.js and Angular.
