In [15]:
from langchain_community.docstore.document import Document
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_community.vectorstores import Chroma
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnableParallel, RunnablePassthrough
from dotenv import load_dotenv
from PyPDF2 import PdfReader
import os
import warnings
warnings.filterwarnings("ignore")

In [21]:
#Function to retrieve documents
def read_docs(paths: list) -> str:
    """
    Function to retrieve documents in pdf and text formats.
    Input: paths = list[str]
    output: retrieved_document = str
    """

    documents = {}
    for index, path in enumerate(paths):
        if path.endswith(".txt"):
            try:
                with open(path, "r") as f:
                    documents[f"doc_{index}"] = []
                    documents[f"doc_{index}"].append(f.read())
            except FileNotFoundError:
                print(f"File not found for {path}")
            except Exception as e:
                print(f"Error reading file {path}: {e}")
        elif path.endswith(".pdf"):
            try:
                reader = PdfReader(path)
                for i in range(len(reader.pages)):
                    if i == 0:
                        documents[f"doc_{index}"] = []
                    text = reader.pages[i].extract_text().strip()
                    if text:
                        documents[f"doc_{index}"].append(text)
            except FileNotFoundError:
                print(f"File not found for {path}")
            except Exception as e:
                print(f"Error reading file {path}: {e}")
    return ["\n".join(text_list) for text_list in documents.values()]

documents = read_docs(["Ekpo David Friday_resume.pdf", "Quick_info.txt"])

In [22]:
load_dotenv()
api_key = os.getenv("api_key")
documents = [Document(page_content = doc) for doc in documents]
embeddings = OpenAIEmbeddings(api_key=api_key)
vector_store = Chroma.from_documents(
    documents,
    embeddings,
    collection_name = "rag_collection",
    persist_directory= "./chat_history"
)

retriever = vector_store.as_retriever()

#LLM
llm = ChatOpenAI(
    model="gpt-4o-mini",
    temperature=0,
    api_key= api_key
)

#Prompt
prompt = ChatPromptTemplate.from_messages([
    ("system", "You are an expert recruiter and psychologist"),
    ("human", "{question}\n\nContext:\n{context}")]
)

#Build RAG Pipeline
rag_chain = (
    RunnableParallel(context = retriever, question = RunnablePassthrough())
    |prompt
    |llm
)

#Query
response = rag_chain.invoke("Who is David Ekpo")
print(response)

content='David Ekpo, also known as Ekpo David Friday, is an individual from Nigeria with a background in mathematics, having earned a Bachelor of Science degree from the University of Ibadan. He has a keen interest in martial arts and warfare, which suggests a strong inclination towards physical activity and possibly strategic thinking. \n\nHis stated goal of wanting to "take over the world" may be interpreted humorously or as an expression of ambition and desire for influence. He identifies as a Christian and hails from Akwa Ibom State. Physically, he is described as having a brown complexion, standing 180 cm tall and weighing 70 kg. \n\nOverall, David appears to be a driven individual with a unique blend of academic and personal interests.' additional_kwargs={'refusal': None} response_metadata={'token_usage': {'completion_tokens': 148, 'prompt_tokens': 383, 'total_tokens': 531, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'r

In [23]:
response.content

'David Ekpo, also known as Ekpo David Friday, is an individual from Nigeria with a background in mathematics, having earned a Bachelor of Science degree from the University of Ibadan. He has a keen interest in martial arts and warfare, which suggests a strong inclination towards physical activity and possibly strategic thinking. \n\nHis stated goal of wanting to "take over the world" may be interpreted humorously or as an expression of ambition and desire for influence. He identifies as a Christian and hails from Akwa Ibom State. Physically, he is described as having a brown complexion, standing 180 cm tall and weighing 70 kg. \n\nOverall, David appears to be a driven individual with a unique blend of academic and personal interests.'