goal: get Tiara's resumes and CVs and turn them into embeddings. as a first pass, use the embedding model referenced in the LlamaIndex starter tutorial.

In [3]:
import logging
import sys

logging.basicConfig(stream=sys.stdout, level=logging.ERROR)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

In [5]:
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, Settings
from llama_index.embeddings.huggingface import HuggingFaceEmbedding

In [7]:
from llama_index.llms.ollama import Ollama

In [9]:
# load documents

documents = SimpleDirectoryReader("data_tiara").load_data()

In [11]:
# set embedding model
# according to LangChain, "BGE models on the HuggingFace are the best open-source embedding models."

Settings.embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-large-en-v1.5")

In [13]:
# ollama to set model to llama3.1 8b

Settings.llm = Ollama(model="llama3.1:8b-instruct-q4_0", request_timeout=360.0)

In [15]:
print(Settings.chunk_size, Settings.chunk_overlap)

1024 200


In [17]:
Settings.chunk_size = 256
Settings.chunk_overlap = 50

In [19]:
# make vector database for document 

index_tiara = VectorStoreIndex.from_documents(
    documents,
)

In [21]:
# use vector database as reference

query_engine = index_tiara.as_query_engine()
response = query_engine.query("What did Tiara study?")
print(response)

Pathobiology and Molecular Medicine.


In [23]:
response = query_engine.query("Where did Tiara go for undergrad?")
print(response)

Michigan State University.


In [14]:
# 

'None'