In [1]:
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, Settings
from llama_index.embeddings.huggingface import HuggingFaceEmbedding


documents = SimpleDirectoryReader("data/Books").load_data(show_progress=True)

# Defina um caminho de cache personalizado
cache_folder = "C:/Users/claud/Desktop/embedding_model"


# bge-base embedding model
Settings.embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-base-en-v1.5", cache_folder=cache_folder)




index = VectorStoreIndex.from_documents(
    documents,
    show_progress=True
)




  from .autonotebook import tqdm as notebook_tqdm
Loading files: 100%|██████████| 5/5 [00:52<00:00, 10.53s/file]
Parsing nodes: 100%|██████████| 2496/2496 [00:01<00:00, 1660.13it/s]
  attn_output = torch.nn.functional.scaled_dot_product_attention(
Generating embeddings: 100%|██████████| 2048/2048 [00:38<00:00, 52.67it/s]
Generating embeddings: 100%|██████████| 496/496 [00:09<00:00, 53.76it/s]


In [2]:
from llama_index.llms.huggingface import HuggingFaceLLM


def messages_to_prompt(messages):
    prompt = ""
    system_found = False
    for message in messages:
        if message.role == "system":
            prompt += f"<|system|>\n{message.content}<|end|>\n"
            system_found = True
        elif message.role == "user":
            prompt += f"<|user|>\n{message.content}<|end|>\n"
        elif message.role == "assistant":
            prompt += f"<|assistant|>\n{message.content}<|end|>\n"
        else:
            prompt += f"<|user|>\n{message.content}<|end|>\n"

    # trailing prompt
    prompt += "<|assistant|>\n"

    if not system_found:
        prompt = (
            "<|system|>\nYou are a helpful AI assistant.<|end|>\n" + prompt
        )

    return prompt


llm = HuggingFaceLLM(
    model_name="microsoft/Phi-3-mini-4k-instruct",
    model_kwargs={
        "trust_remote_code": False,
    },
    generate_kwargs={"do_sample": True, "temperature": 0.1},
    tokenizer_name="microsoft/Phi-3-mini-4k-instruct",
    query_wrapper_prompt=(
        "<|system|>\n"
        "You are a helpful AI assistant.<|end|>\n"
        "<|user|>\n"
        "{query_str}<|end|>\n"
        "<|assistant|>\n"
    ),
    messages_to_prompt=messages_to_prompt,
    is_chat_model=True,
)








Loading checkpoint shards: 100%|██████████| 2/2 [00:04<00:00,  2.44s/it]
Some parameters are on the meta device because they were offloaded to the cpu.


In [3]:
Settings.llm = llm
query_engine = index.as_query_engine(show_progress=True, streaming=True)

In [4]:

streaming_response = query_engine.query("What is the best activation function for neural networks?")
streaming_response.print_response_stream()

Starting from v4.46, the `logits` model output will have the same type as the model (except at train time, where it will always be FP32)


The best activation function for neural networks depends on the specific problem and the characteristics of the data. However, ReLU (Rectified Linear Unit) activation function is commonly used due to its simplicity and efficiency. It helps avoid the vanishing gradient problem and allows for faster convergence during training. Other activation functions like ELU (Exponential Linear Unit) and SELU (Scaled Exponential Linear Unit) also have their advantages, such as self-normalizing properties and faster convergence rates. Ultimately, the choice of activation function should be based on the specific requirements of the neural network and the problem being solved.