In [9]:
from PyPDF2 import PdfReader
from langchain_openai import OpenAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_classic.chains.combine_documents import create_stuff_documents_chain
from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
from langchain_classic.chains import create_retrieval_chain

import constants
from constants import openai_key

In [11]:
pdf_reader = PdfReader("Md_Abir____Junior_Data_Analyst__Experian.pdf")

In [12]:
from typing_extensions import Concatenate
#reading text from the pdf

raw_text = ""

for i, page in enumerate(pdf_reader.pages):
    content = page.extract_text()
    if content:
        raw_text += content


In [13]:
print(raw_text)

Md Abir Hossain
+44 7507034017 |abirfarabi915@gmail.com |linkedin.com/in/abir-farabi |Birmingham, UK
Summary
MSc graduate in Artificial Intelligence and Machine Learning with hands-on experience in Python, Machine
Learning, LangChain, RAG, SQL and statistical modeling. Built and evaluated traditional machine learning
models in real-world projects. I am seeking Junior Data Analyst / Data Scientist roles. I have the right to work
in the UK.
Projects
SlotFinder |Python, LSTM, Pandas, Matplotlib
•Developed a spatio-temporal car parking prediction system using Python, implementing stacked LSTM models to
forecast slot availability with high accuracy
•Performed data collection ,data cleaning ,feature engineering andtime series preprocessing on
multivariate parking and timestamp data
•Achieved 96% prediction accuracy with low RMSE and MAPE, validating performance using train/validation/test
splits and appropriate evaluation metrics
Sequential LLM Intelligence Pipeline |Python, LangChain (LCEL)

In [14]:
#We need to split the text using Character Text Split such that it should not increase token size
text_splitter = RecursiveCharacterTextSplitter(
    separators = "\n",
    chunk_size = 800,
    chunk_overlap = 200,
    length_function = len
)
texts = text_splitter.split_text(raw_text)

In [15]:
print(len(texts))

5


In [16]:
#Downloading the embeddings from openai
embeddings = OpenAIEmbeddings(openai_api_key = openai_key)

In [17]:
document_search = FAISS.from_texts(texts, embeddings)

In [18]:
document_search

<langchain_community.vectorstores.faiss.FAISS at 0x1be227b7440>

In [19]:
prompts= ChatPromptTemplate.from_template(
    """You are helpful assistant specialized in analyzing PDF documents.
    Answers the user's question using ONLY the provided context
        
    If the answer is not contained within the context, pilitely state that you don't have 
        enough information to answer
        
    <context>
    {context}
    </context>
        
    Question: {input}""")

chain = create_stuff_documents_chain(
    llm = ChatOpenAI(api_key= openai_key),
    prompt= prompts
)

In [20]:
retriever = document_search.as_retriever(search_kwargs={"k": 3})

rag_chain = create_retrieval_chain(retriever, chain)

In [23]:
response = rag_chain.invoke({"input": "Does the specific guy has any work experience?"})

print(response["answer"])

Yes, the specific guy has work experience as a Teaching Assistant from Jun. 2021 – Sep. 2021 at United International University in Dhaka, Bangladesh, and as a Customer Service Assistant from Jan. 2024 to the present at Tesco Birmingham, United Kingdom.
