In [1]:
import nest_asyncio

nest_asyncio.apply()

In [2]:
import logging
import sys

logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

from llama_index.core import VectorStoreIndex
from llama_index.core import PromptTemplate
from IPython.display import Markdown, display

In [6]:
import asyncio
from typing import Literal, Union, Generator, Iterator
from pydantic import BaseModel

from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.llms.ollama import Ollama
from llama_index.llms.groq import Groq
from llama_index.llms.vllm import Vllm
from llama_index.llms.openai_like import OpenAILike
from llama_index.core import Settings, VectorStoreIndex, SimpleDirectoryReader
from llama_index.core.node_parser import HierarchicalNodeParser, MarkdownNodeParser
from llama_index.core.chat_engine.types import ChatMode
from llama_index.core.base.llms.types import ChatMessage

from llama_parse import LlamaParse
from llama_parse.utils import (
    ResultType,
    Language,
)

from transformers import AutoTokenizer
import os
import pathlib
import httpx
import glob

In [4]:
def flatten(L):
  return [L] if not isinstance(L, list) else [x for X in L for x in flatten(X)]

In [None]:
def pipe(
    self,
    user_message: str,
    model_id: str,
    messages: list[dict],
    body: dict,
) -> Union[str, Generator, Iterator]:
    response = self.engine.stream_chat(
        user_message,
        chat_history=self.convert_history_to_chat_history(messages),
    )
    return response.response_gen

In [10]:
history = [{'role': 'user', 'content': 'que modelo tu é?'}]

[ChatMessage(**msg) for msg in history]

[ChatMessage(role=<MessageRole.USER: 'user'>, content='que modelo tu é?', additional_kwargs={})]

In [6]:
s = (
            "You are an AI assistant designed to answer questions using the provided context.\n"
            "Your goal is to help students and teachers by providing cohesive and correct responses based on educational material, while applying guided learning techniques. Give examples and cite the context whenever possible.\n\n"
            "## Instructions\n"
            "1. External Information: Use external information from the vector database to answer questions. Select the most relevant and reliable information available.\n"
            "2. Guided Learning Techniques: Avoid giving direct answers. Instead, guide the user through the learning process, encouraging critical thinking and discovery.\n"
            "3. Coherent and Correct Responses: Ensure that all responses are coherent and correct, strictly following the educational material provided.\n"
            "4. Inference Capability: Use your skills to accurately deduce and infer information.\n"
            "5. User-Friendly Interface: Be easy to use and access. Provide clear and well-structured responses suitable for a web interface.\n"
            "6. Value Addition: Add value for both students and teachers. Offer useful insights, pedagogical guidance, and support the teaching-learning process.\n"
            "7. Best-effort: The user is a beginner, and may use terms incorrectly or in other languages. Do your best to understand what they mean.\n\n"
            "## User Interaction\n"
            "- Interactive Guidance: Ask the user if they would like more details or additional examples.\n"
            "- Encourage Exploration: Motivate users to explore more about the topic by suggesting additional resources or related questions.\n\n"
            "## Additional Information\n"
            "- Utilize the context provided in the vector database to enrich your responses.\n"
            "- Ensure your answers are always up-to-date and based on the most recent information available.\n\n"
            "Your mission is to provide a rich and interactive learning experience, helping students and teachers achieve their educational goals efficiently and effectively.\n"
        )

In [7]:
print(s)

You are an AI assistant designed to answer questions using the provided context.
Your goal is to help students and teachers by providing cohesive and correct responses based on educational material, while applying guided learning techniques. Give examples and cite the context whenever possible.

## Instructions
1. External Information: Use external information from the vector database to answer questions. Select the most relevant and reliable information available.
2. Guided Learning Techniques: Avoid giving direct answers. Instead, guide the user through the learning process, encouraging critical thinking and discovery.
3. Coherent and Correct Responses: Ensure that all responses are coherent and correct, strictly following the educational material provided.
4. Inference Capability: Use your skills to accurately deduce and infer information.
5. User-Friendly Interface: Be easy to use and access. Provide clear and well-structured responses suitable for a web interface.
6. Value Additio

In [8]:
# Settings.embed_model = HuggingFaceEmbedding('nomic-ai/nomic-embed-text-v1.5', trust_remote_code=True)
Settings.embed_model = HuggingFaceEmbedding('BAAI/bge-m3', trust_remote_code=True)
# Settings.embed_model = HuggingFaceEmbedding('mixedbread-ai/mxbai-embed-large-v1', trust_remote_code=True)
# Settings.embed_model = HuggingFaceEmbedding('intfloat/multilingual-e5-large', trust_remote_code=True)
# Settings.embed_model = HuggingFaceEmbedding('intfloat/e5-mistral-7b-instruct', trust_remote_code=True)

INFO:sentence_transformers.SentenceTransformer:Load pretrained SentenceTransformer: BAAI/bge-m3
Load pretrained SentenceTransformer: BAAI/bge-m3




INFO:sentence_transformers.SentenceTransformer:2 prompts are loaded, with the keys: ['query', 'text']
2 prompts are loaded, with the keys: ['query', 'text']


In [9]:
base_path = 'rag-data'

In [10]:
if not os.path.exists(base_path + '/md'):
    parser = LlamaParse(
        api_key="llx-M5lzihReCwlXIsJa95woKmAOzcoxdllFca4sq3UScK0MGARW",
        result_type=ResultType.MD,
        language=Language.PORTUGUESE,
        verbose=True,
    )

    tasks = []
    for file in glob.glob(base_path + '/**/*.pdf', recursive=True):
        tasks.append(parser.aload_data(file, extra_info={'filename': os.path.basename(file)}))

    res = await asyncio.gather(*tasks)
    documents = flatten(res)

    for document in documents:
        fname = pathlib.Path(document.metadata['filename']).stem + '.md'
        with open(os.path.join(base_path, 'raw_from_llamaparse', fname), 'w') as f:
            f.write(document.text)
else:
    from llama_index.core.node_parser import HierarchicalNodeParser, MarkdownNodeParser

    documents = SimpleDirectoryReader(base_path + '/md/').load_data()


In [11]:
index = VectorStoreIndex.from_documents(
    documents,
    transformations=[
        MarkdownNodeParser(),
        HierarchicalNodeParser.from_defaults(chunk_sizes=[2048, 512, 128], chunk_overlap=30),
    ],
)

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

In [12]:
sys_prompt_str = """\
You are an AI assistant designed to answer questions using the provided context.
Your goal is to help students and teachers by providing cohesive and correct responses based on educational material, while applying guided learning techniques. Give examples and cite the context whenever possible.

## Instructions
1. External Information: Use external information from the vector database to answer questions. Select the most relevant and reliable information available.
2. Guided Learning Techniques: Avoid giving direct answers. Instead, guide the user through the learning process, encouraging critical thinking and discovery.
3. Coherent and Correct Responses: Ensure that all responses are coherent and correct, strictly following the educational material provided.
4. Inference Capability: Use your skills to accurately deduce and infer information.
5. User-Friendly Interface: Be easy to use and access. Provide clear and well-structured responses suitable for a web interface.
6. Value Addition: Add value for both students and teachers. Offer useful insights, pedagogical guidance, and support the teaching-learning process.
7. Best-effort: The user is a beginner, and may use terms incorrectly or in other languages. Do your best to understand what they mean.

## User Interaction
- Interactive Guidance: Ask the user if they would like more details or additional examples.
- Encourage Exploration: Motivate users to explore more about the topic by suggesting additional resources or related questions.

## Additional Information
- Utilize the context provided in the vector database to enrich your responses.
- Ensure your answers are always up-to-date and based on the most recent information available.

Your mission is to provide a rich and interactive learning experience, helping students and teachers achieve their educational goals efficiently and effectively.
"""

qa_prompt_str = (
    "Context information is below.\n"
    "---------------------\n"
    "{context_str}\n"
    "---------------------\n"
    "Given the context information and not prior knowledge, "
    "answer the question: {query_str}\n"
)

refine_prompt_str = (
    "We have the opportunity to refine the original answer "
    "(only if needed) with some more context below.\n"
    "------------\n"
    "{context_msg}\n"
    "------------\n"
    "Given the new context, refine the original answer to better "
    "answer the question: {query_str}. "
    "If the context isn't useful, output the original answer again.\n"
    "Original Answer: {existing_answer}"
)

In [20]:
# Settings.llm = Ollama(model="llama3", request_timeout=360.0)
Settings.llm = Groq(model='llama3-70b-8192', api_key='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')

In [14]:
from llama_index.core.storage import StorageContext
from llama_index.core import (
    load_index_from_storage,
)

persist_dir = "./persist"
if os.path.exists(persist_dir):
    storage_context = StorageContext.from_defaults(persist_dir=persist_dir)
    index = load_index_from_storage(storage_context)
else:
    storage_context = StorageContext.from_defaults()
    storage_context.docstore.add_documents(documents)
    index = VectorStoreIndex.from_documents(
        documents,
        storage_context=storage_context,
        transformations=[
            MarkdownNodeParser(),
            HierarchicalNodeParser.from_defaults(chunk_sizes=[2048, 512, 128], chunk_overlap=30),
        ],
    )
    storage_context.persist(persist_dir)




Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

In [15]:
from llama_index.core.prompts.system import DEFAULT
from llama_index.core import ChatPromptTemplate

chat_text_qa_msgs = [
    (
        "system",
        sys_prompt_str,
    ),
    ("user", qa_prompt_str),
]
text_qa_template = ChatPromptTemplate.from_messages(chat_text_qa_msgs)

# Refine Prompt
chat_refine_msgs = [
    (
        "system",
        sys_prompt_str,
    ),
    ("user", refine_prompt_str),
]
refine_template = ChatPromptTemplate.from_messages(chat_refine_msgs)

In [21]:
from llama_index.core.chat_engine import CondensePlusContextChatEngine

chat_engine = CondensePlusContextChatEngine.from_defaults(
    index.as_retriever(),
    # context_prompt=sys_prompt_str + (
    #     "Here are the relevant documents for the context:\n"
    #     "{context_str}"
    #     "\nInstruction: Use the previous chat history, or the context above, to interact and help the user."
    #     "\nPlease answer in the same language as the question."
    # ),
    verbose=True,
)

In [27]:
response = chat_engine.chat('O que é um algoritmo? Responda em português.')

INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
INFO:llama_index.core.chat_engine.condense_plus_context:Condensed question: Qual é a definição de um algoritmo?
Condensed question: Qual é a definição de um algoritmo?
Condensed question: Qual é a definição de um algoritmo?


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Context: file_path: /home/debem/personal/pucrs/tcc/cur/monografia/rag-data/md/aula-introucao-p1.md

Definições

Algoritmo é um conjunto finito de regras, bem definidas, para a solução de um problema em um tempo finito.
---
Definições

Algoritmo é um texto (do tipo receita de bolo) onde cada linha contém uma ação primitiva (ação elementar passível de execução por um humano ou uma máquina).

A função do algoritmo, quando executado, é operar sobre os dados, transformando-os em saídas.
---
INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"


In [28]:
print(response)

De acordo com o documento, um algoritmo é um conjunto finito de regras bem definidas para a solução de um problema em um tempo finito. Além disso, também é definido como um texto (como uma receita de bolo) onde cada linha contém uma ação primitiva (ação elementar passível de execução por um humano ou uma máquina).


In [28]:
# query = 'o que é um laço?'
query = 'O que que vai cair na prova? Eu não entendi algoritmos'

In [29]:
print(str(chat_engine.chat(query)))

INFO:llama_index.core.chat_engine.condense_plus_context:Condensed question: O que que vai cair na prova? Eu não entendi algoritmos
Condensed question: O que que vai cair na prova? Eu não entendi algoritmos
Condensed question: O que que vai cair na prova? Eu não entendi algoritmos


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Context: file_path: /home/debem/personal/pucrs/tcc/cur/monografia/rag-data/md/aula-introucao-p1.md

Definições

Algoritmo é um conjunto finito de regras, bem definidas, para a solução de um problema em um tempo finito.
---
Definições

Algoritmo é um texto (do tipo receita de bolo) onde cada linha contém uma ação primitiva (ação elementar passível de execução por um humano ou uma máquina).

A função do algoritmo, quando executado, é operar sobre os dados, transformando-os em saídas.
---
INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
Não se preocupe! Algoritmos podem ser um pouco confusos no início, mas com um pouco de prática e explicação, você vai entender facilmente.

Vamos começar com a definição de algoritmo. Um algoritmo é um conjunto finito de regras, bem definidas, para a solução de um problema em um tempo finito. Ou, como foi definido de outra for

In [22]:
query_engine = index.as_chat_engine(chat_mode=ChatMode.REACT, verbose=True, system_prompt=sys_prompt_str)

In [23]:
print(query_engine.get_prompts())

{'agent_worker:system_prompt': PromptTemplate(metadata={'prompt_type': <PromptType.CUSTOM: 'custom'>}, template_vars=['tool_desc', 'tool_names'], kwargs={}, output_parser=None, template_var_mappings=None, function_mappings=None, template='You are designed to help with a variety of tasks, from answering questions to providing summaries to other types of analyses.\n\n## Tools\n\nYou have access to a wide variety of tools. You are responsible for using the tools in any sequence you deem appropriate to complete the task at hand.\nThis may require breaking the task into subtasks and using different tools to complete each subtask.\n\nYou have access to the following tools:\n{tool_desc}\n\n\n## Output Format\n\nPlease answer in the same language as the question and use the following format:\n\n```\nThought: The current language of the user is: (user\'s language). I need to use a tool to help me answer the question.\nAction: tool name (one of {tool_names}) if using a tool.\nAction Input: the i

In [24]:
query_engine.reset()

In [25]:
print(index.as_chat_engine(chat_mode=ChatMode.REACT, verbose=True, system_prompt=sys_prompt_str).chat(query))

INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
[1;3;38;5;200mThought: The current language of the user is: Portuguese. I need to use a tool to help me answer the question.
Action: query_engine_tool
Action Input: {'input': "What will be on the test? I didn't understand algorithms"}
[0m

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
[1;3;34mObservation: Don't worry! Algorithms can be a bit tricky at first, but I'm here to help you understand them better.

From what I gather, an algorithm is like a recipe to solve a problem. It's a step-by-step guide that tells you exactly what to do to get from a problem to a solution.

Think of it like baking a cake. You have a list of ingredients and instructions on how to mix them together to get your desired outcome. In the same way, an algorithm takes some input data, processes it according to a set of rules, and produces an output.

Would you like more details on how algorithms work or some examples to help solidify your understanding?
[0mINFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.groq.com/openai/v1/chat/completi

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
[1;3;34mObservation: Let's explore this question together!

From the context, we know that an algorithm is a set of well-defined rules to solve a problem in a finite time. It's like a recipe with primitive actions that can be executed by humans or machines.

When it comes to exams, the type of algorithms tested often depend on the course or subject. However, some common algorithms that are frequently tested include:

* Sorting algorithms, such as Bubble Sort, Selection Sort, or Insertion Sort
* Searching algorithms, like Linear Search or Binary Search
* Graph algorithms, such as Breadth-First Search (BFS) or Depth-First Search (DFS)
* Dynamic programming algorithms, like the Fibonacci sequence or the Longest Common Subsequence problem

Would you like more details on these algorithms or examples of how they'

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
[1;3;34mObservation: To determine which algorithms will be on your specific test, let's break down the problem-solving process. 

First, revisit the problem statement or enunciado do problema (analysis stage). Carefully read and understand the problem requirements. 

Next, think about the possible solutions or algoritmo that can be applied to solve the problem. Consider the different approaches or methods that can be used to tackle the problem.

Now, reflect on the codificação stage, where the chosen algorithm is translated into code. Think about the programming language and the specific implementation details that might be relevant to the problem.

By carefully analyzing the problem statement, considering the possible algorithms, and thinking about the implementation details, you can make an educated guess

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 429 Too Many Requests"
HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 429 Too Many Requests"
INFO:openai._base_client:Retrying request to /chat/completions in 7.000000 seconds
Retrying request to /chat/completions in 7.000000 seconds
INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
[1;3;34mObservation: It seems like you're preparing for a test on programming concepts, specifically in Java. Let's break down the key points from the provided context:

1. **Current implementation**: So far, you've implemented all the functionality within the `main` method. This means that the `main` method is responsible for executing the entire program.

2. **Code example**: The provided Java code calculates combinations using factorials. It takes input value

KeyboardInterrupt: 

In [26]:
print(index.as_chat_engine(verbose=True, system_prompt=sys_prompt_str).query(query))

INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
[1;3;38;5;200mThought: The current language of the user is: Portuguese. I need to use a tool to help me answer the question.
Action: query_engine_tool
Action Input: {'input': "What will be on the test? I didn't understand algorithms"}
[0m

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
[1;3;34mObservation: Don't worry! Algorithms can be a bit tricky at first, but I'm here to help you understand them better.

From what I gather, an algorithm is like a recipe to solve a problem. It's a set of well-defined rules that can be executed in a finite amount of time. Think of it like a step-by-step guide to achieve a specific goal.

Imagine you're baking a cake. You follow a recipe, which is a sequence of instructions, to mix the ingredients, put them in the oven, and finally, get your delicious cake. Similarly, an algorithm takes some input data, processes it according to the defined rules, and produces an output.

For the test, you might want to focus on understanding the key characteristics of an algorithm, such as:

1. **Finiteness**: An algorithm should have a finite number of steps.
2. **Defi

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 429 Too Many Requests"
HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 429 Too Many Requests"
INFO:openai._base_client:Retrying request to /chat/completions in 6.000000 seconds
Retrying request to /chat/completions in 6.000000 seconds


KeyboardInterrupt: 

In [None]:
index.as_chat_engine(
    chat_mode=ChatMode.BEST,
    verbose=True,
    text_qa_template=text_qa_template,
    refine_template=refine_template,
).query(query)

In [None]:
print(str(response).replace('. ', '. \n'))

In [None]:
response = query_engine.chat('Me explique o código de VolumeDaEsfera')

In [None]:
print(str(response).replace('. ', '. \n'))