In [22]:
#Libraries
from llama_index.core import Document, VectorStoreIndex, Settings
from llama_index.llms.openai import OpenAI
from llama_index.embeddings.openai import OpenAIEmbedding
from PyPDF2 import PdfReader
import os
from dotenv import load_dotenv


#Load Environment variables
load_dotenv()
api_key = os.getenv("api_key")

#Configure LlamaIndex
Settings.llm = OpenAI(model="gpt-3.5-turbo", temperature=0, api_key=api_key)
Settings.embed_model = OpenAIEmbedding(api_key=api_key)

#Obtain Document
reader = PdfReader("Ekpo David Friday_resume.pdf")

#Function to retrieve documents
def read_docs(paths: list) -> str:
    """
    Function to retrieve documents in pdf and text formats.
    Input: paths = list[str]
    output: retrieved_document = str
    """

    documents = {}
    for index, path in enumerate(paths):
        if path.endswith(".txt"):
            try:
                with open(path, "r") as f:
                    documents[f"doc_{index}"] = []
                    documents[f"doc_{index}"].append(f.read())
            except FileNotFoundError:
                print(f"File not found for {path}")
            except Exception as e:
                print(f"Error reading file {path}: {e}")
        elif path.endswith(".pdf"):
            try:
                reader = PdfReader(path)
                for i in range(len(reader.pages)):
                    if i == 0:
                        documents[f"doc_{index}"] = []
                    text = reader.pages[i].extract_text().strip()
                    if text:
                        documents[f"doc_{index}"].append(text)
            except FileNotFoundError:
                print(f"File not found for {path}")
            except Exception as e:
                print(f"Error reading file {path}: {e}")
    return ["\n".join(text_list) for text_list in documents.values()]


read_docs(["Ekpo David Friday_resume.pdf"])

['SUMMARY\nAug 2025 - Jan 2026 AI Engineer fellow, GEN AI fellowship Nigeria\nDeveloped and implemented Churn model using Python to predict if a customer churns or not with\nmore than 90% accuracy.\nDeveloped a KNN model using Python to classify users into different key segments for marketing\npurposes.\nGenerated comprehensive weekly reports using Google Sheets highlighting key performance\nindicators and trends.\nPresented insights to management for strategic decision making.\nProduced detailed monthly reports containing a wide range of performance metrics.\nCompiled and analyzed a weekly list of newly registered customers or users which helped identify\npatterns and trends to guide marketing and outreach efforts.\nPrepared and analyzed a weekly churn list which helped the CS department identify reasons for\ncustomer attrition and develop strategies for reducing churn.I am a passionate data scientist/ data analyst skilled in data analysis, visualization, problem-solving, business\nin

In [25]:
#Create documents and index
documents = read_docs(["Ekpo David Friday_resume.pdf", "Quick_info.txt"])
llama_docs = [Document(text = doc) for doc in documents]
index = VectorStoreIndex.from_documents(llama_docs)

#Query
query_engine = index.as_query_engine()
response = query_engine.query("Give me positions that is suitable for David Ekpo and his chances(probabilities) of landing a senior role in that position")

In [26]:
response.response

"Data Scientist, Senior Data Analyst, Machine Learning Engineer, AI Engineer, Business Intelligence Analyst. Chances of landing a senior role in these positions are high given David Ekpo's extensive experience in data analysis, machine learning, business intelligence, and programming, as well as his track record of developing advanced machine learning applications and providing strategic insights to management."

In [27]:
response = query_engine.query("Is David Ekpo a threat to world peace")
response.response

'There is no indication in the provided context information to suggest that David Ekpo is a threat to world peace.'

In [29]:
response = query_engine.query("Give me information on David Ekpo including personal info")
response.response

"David Ekpo's name is Ekpo David Friday. He holds a Bachelor of Science degree in Mathematics from the University of Ibadan. He is from Nigeria, specifically from Akwa Ibom state. His hobbies include Martial Arts and Warfare. He is a Christian. David Ekpo is 180cm tall and weighs 70kg."

In [30]:
response = query_engine.query("How smart is David Ekpo")
response.response

'David Ekpo is skilled in data analysis, visualization, problem-solving, business intelligence analysis, and programming with 4+ years of experience. He has developed high-end machine learning applications and analysis to provide solutions to social and business problems. Additionally, he has led a team in developing applications, analyzed data to identify patterns and trends, designed databases, and developed predictive models using advanced algorithms.'

In [17]:
read_docs(["Ekpo David Friday_resume.pdf", "Quick_info.txt"])

['Basic Information about me\n\nName: Ekpo David Friday\nEducational Background: Bsc. Mathematics\nHigher Institution: University of Ibadan\nHobbies: Martial Arts and Warfare\nGoals: To take over the world\nReligion: Christianity\nCountry: Nigeria\nState Of Origin: Akwa Ibom \nComplexion: Brown\nHeight: 180cm\nWeight: 70kg\n\n\n']