In [72]:
from dotenv import load_dotenv
import os
from langchain_mistralai import ChatMistralAI
import numpy as np
from IPython.display import Markdown

In [53]:
load_dotenv()

True

In [48]:
mistral_api_key = os.getenv("MISTRAL_API_KEY")

if mistral_api_key:
    mistral_model = ChatMistralAI(name="open-mixtral-8x7b")
else:
    print("Couldn't load API key.")

## Loading stage

In [12]:
from llama_index.core import SimpleDirectoryReader
from llama_index.readers.file import HTMLTagReader

In [13]:
article_dir = "./data"

In [30]:
htlm_parser = HTMLTagReader(tag="html")
file_extractor = {".html": htlm_parser}
docs = SimpleDirectoryReader(
    article_dir, file_extractor=file_extractor
).load_data()

## Indexing stage

In [29]:
from llama_index.core import VectorStoreIndex

When you use from_documents, your Documents are split into chunks and parsed into Node objects, lightweight abstractions over text strings that keep track of metadata and relationships.

By default, VectorStoreIndex stores everything in memory. 

https://docs.llamaindex.ai/en/stable/module_guides/supporting_modules/settings/  
The Settings is a bundle of commonly used resources used during the indexing and querying stage in a LlamaIndex workflow/application.

In [33]:
from llama_index.core import Settings

In [59]:
Settings.llm = mistral_model

https://docs.llamaindex.ai/en/stable/module_guides/models/embeddings/ - at the bottom of page supported embedding models located

https://huggingface.co/spaces/mteb/leaderboard - embedding leaderboard

In [34]:
from llama_index.embeddings.huggingface import HuggingFaceEmbedding

In [35]:
hf_embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/94.8k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/52.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/743 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/133M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/366 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/711k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [43]:
embeddings = hf_embed_model.get_text_embedding("Hello, I'm Cristiano Ronaldo - the GOAT")

In [44]:
np_embeddings = np.array(embeddings)
print(np_embeddings.shape)
np_embeddings[:25]

(384,)


array([-0.05185727,  0.02237587,  0.03083039, -0.0498937 ,  0.04089728,
        0.03665009,  0.10690992,  0.03151881,  0.11420359,  0.00775344,
        0.01789973, -0.07713404, -0.01182764,  0.01477604,  0.02602616,
       -0.04921474, -0.01720965, -0.0254571 , -0.09258013, -0.01208582,
        0.02490713,  0.01544346, -0.08535224, -0.0438053 ,  0.00242467])

In [45]:
Settings.embed_model = hf_embed_model

In [46]:
Settings.chunk_size = 1024

The tokenizer is used to count tokens. This should be set to something that matches the LLM you are using.

In [54]:
from transformers import AutoTokenizer

Settings.tokenizer = AutoTokenizer.from_pretrained("mistralai/Mixtral-8x7B-v0.1")

tokenizer_config.json:   0%|          | 0.00/967 [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.80M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/72.0 [00:00<?, ?B/s]

https://docs.llamaindex.ai/en/stable/module_guides/storing/vector_stores/ - list of vector stores

In [55]:
index = VectorStoreIndex.from_documents(docs, show_progress=True)

Parsing nodes:   0%|          | 0/1 [00:00<?, ?it/s]

Generating embeddings:   0%|          | 0/6 [00:00<?, ?it/s]

## Querying stage

https://docs.llamaindex.ai/en/stable/module_guides/deploying/query_engine/

In [61]:
query_engine = index.as_query_engine()

In [79]:
system_prompt = """
You are a Q&A assistant. Your goal is to answer questions as
accurately as possible based on the instructions and context provided.
"""
user_prompt = 'How much Safe SuperIntelligence raise?'

In [80]:
response = query_engine.query(system_prompt + user_prompt)

In [81]:
Markdown(response.response)

Safe Superintelligence raised $1 billion.