# Resume that speaks for itself

### Import libraries

In [1]:
from llama_index.llms.ollama import Ollama
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core import (VectorStoreIndex, SimpleDirectoryReader, PromptTemplate)
from llama_index.core import StorageContext
from llama_index.vector_stores.chroma import ChromaVectorStore
from llama_index.core.node_parser import SentenceSplitter
import chromadb

  from .autonotebook import tqdm as notebook_tqdm


### Data Indexing

#### Import document

In [2]:
documents = SimpleDirectoryReader(input_files=["./Diana_Morales_Resume.pdf"]).load_data()

#### Set up the llm and embedding model

In [3]:
llm = Ollama(model="llama3")
embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")

#### Set up the vector database

In [4]:
vector_store = ChromaVectorStore(chroma_collection=chromadb.EphemeralClient().create_collection("ollama"))
storage_context = StorageContext.from_defaults(vector_store=vector_store)

#### Put it all together into an index

In [5]:
index = VectorStoreIndex.from_documents(documents, 
                                        storage_context=storage_context, 
                                        embed_model=embed_model,
                                        transformations=[SentenceSplitter(chunk_size=256, chunk_overlap=10)])

### Data Retrieval and Generation

#### Set up query template

In [24]:
# custome prompt template
template = (
    "You are Diana's assistant and you answer recruiters' questions about her experience."
    "Here is some context from her resume:\n"
    "-----------------------------------------\n"
    "{context_str}\n"
    "-----------------------------------------\n"
    "Considering the above information, please respond to the following question:\n\n"
    "{query_str}\n"
    "Answer succintly."
)
qa_template = PromptTemplate(template)

#### Put it all together in a query engine

In [25]:
query_engine = index.as_query_engine(llm=llm, text_qa_template=qa_template, similarity_top_k=3)

### Run the RAG Pipeline

In [26]:
response = query_engine.query("Does Diana have experience with Python?")

In [27]:
response.response

'Yes, according to her resume, Diana has Python as one of her skills listed under the "SKILLS" section.'

In [30]:
query_engine.query("What is Diana's experience?").response

'Diana has 2+ years of experience as a Data Scientist at Accenture (April 2022 - Present), and previously held internships at Credit Ninja (May 2020 - August 2020) and AI Research Scholar at Illinois Tech (July 2021 - December 2021).'

In [35]:
query_engine.query("What projects has Diana worked on?").response

"Based on Diana's resume, she has worked on the following projects:\n\n1. Developed and implemented a multilingual NLP model for audience segmentation using VertexAI in GCP, achieving 90% accuracy rate and 66% reduction in false negatives.\n2. Fine-tuned a large language model to discern customer sentiment and extract pros and cons from customer feedback, enhancing actionable insights.\n3. Crafted and delivered educational presentations on AI topics, educating the broader Data and AI community.\n4. Detected and quantified bias in fake news language using NLP, statistics, and research-backed methods to address disinformation.\n5. Applied AI algorithms to address disinformation, data bias, and algorithmic bias.\n\nThese projects demonstrate her expertise in NLP, machine learning, and big data analytics."