In [22]:
%pip install -Uq llama-index-llms-groq
%pip install -Uq llama-index
%pip install -Uq python-dotenv

Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.


# llm v0 
Multiple chat com resisor 

In [None]:
import os
from llama_index.core.llms import ChatMessage
from llama_index.llms.groq import Groq
from dotenv import load_dotenv

load_dotenv()

GROQ_API_KEY = os.getenv("GROQ_API_KEY")

SYSTEM_PROMPT = open('prompts/system_00.md', 'r', encoding='utf-8').read()

APPEND_PROMPT = "RESPIRA FUNDO, REVÊ CUIDADOSAMENTE O CONTEÚDO ORIGINAL, E AGORA COMPLETA A VERSÃO FINAL DO CONTEÚDO FORMATADO. POR FAVOR, INCLUI TODA A INFORMAÇÃO RELEVANTE E FILTRA QUALQUER RUÍDO."

REVISOR_PROMPT = open('prompts/revisor_prompt_00.md', 'r', encoding='utf-8').read()

class GroqLLM:
    def __init__(self):
        self.llm = Groq(model="llama-3.1-70b-versatile", api_key=GROQ_API_KEY, temperature=0.1)

    def complete(self, prompt, context=None, system_prompt=SYSTEM_PROMPT):
        messages = [
            ChatMessage(role="system", content=system_prompt),
            ChatMessage(role="user", content=prompt)
        ]
        
        if context is not None:
            messages.append(ChatMessage(role="user", content=f"Context: {context}"))
        
        response = self.llm.chat(messages)
        return response.message.content

    def chat(self, messages):
        response = self.llm.chat(messages)
        return response.message.content

def process_file(file_path, llm):
    with open(file_path, 'r', encoding='utf-8') as file:
        content = file.read()

    # Step 1: Initial processing
    initial_prompt = f"{content}\n\n{APPEND_PROMPT}"
    initial_response = llm.complete(initial_prompt)

    # Step 2: Revision
    revision_messages = [
        ChatMessage(role="system", content=SYSTEM_PROMPT),
        ChatMessage(role="user", content=initial_prompt),
        ChatMessage(role="assistant", content=initial_response),
        ChatMessage(role="user", content=REVISOR_PROMPT),
    ]
    final_response = llm.chat(revision_messages)

    return final_response #.message.content

def main():
    llm = GroqLLM()
    input_folder = 'output2'
    output_folder = 'output_clean_formatted'

    # Create output folder if it doesn't exist
    os.makedirs(output_folder, exist_ok=True)

    # Process each file in the input folder
    for filename in os.listdir(input_folder):
        if filename.endswith('.md'):  # Assuming the files are markdown
            input_path = os.path.join(input_folder, filename)
            output_path = os.path.join(output_folder, filename)

            print(f"Processing {filename}...")
            processed_content = process_file(input_path, llm)

            # Save the processed content
            with open(output_path, 'w', encoding='utf-8') as file:
                file.write(processed_content)

            print(f"Saved processed content to {output_path}")

if __name__ == "__main__":
    main()

# llm v3

Apenas um system e passa os ficheiro para a llm

In [5]:
import os
import time
import logging
from llama_index.llms.groq import Groq
from llama_index.core.llms import ChatMessage

# Set up logging
logging.basicConfig(filename='processing_log.txt', level=logging.INFO,
                    format='%(asctime)s - %(levelname)s - %(message)s')

GROQ_API_KEY = os.environ.get("GROQ_API_KEY")

SYSTEM_PROMPT = open('prompts/system_01.md', 'r', encoding='utf-8').read()

#"llama-guard-3-8b"

class GroqLLM:
    def __init__(self):
        self.models = [
            {"name": "llama-3.1-70b-versatile", "context_window": 8192},
            {"name": "llama-3.1-8b-instant", "context_window": 8192},
            {"name": "mixtral-8x7b-32768", "context_window": 32768},
            {"name": "llama3-70b-8192", "context_window": 8192},
            {"name": "llama3-8b-8192", "context_window": 8192},
            {"name": "gemma2-9b-it", "context_window": 8192},
            {"name": "gemma-7b-it", "context_window": 8192},
            {"name": "llama-3.2-90b-vision-preview", "context_window": 8192},
            {"name": "llama-3.2-11b-vision-preview", "context_window": 8192},
            {"name": "llama3-groq-70b-8192-tool-use-preview", "context_window": 8192},
            {"name": "llama3-groq-8b-8192-tool-use-preview", "context_window": 8192},
            {"name": "llama-3.2-3b-preview", "context_window": 8192},
            {"name": "llama-3.2-1b-preview", "context_window": 8192},
            {"name": "llama-guard-3-8b", "context_window": 8192},
            {"name": "llava-v1.5-7b-4096-preview", "context_window": 4096},
        ]
        self.current_model_index = 0
        self.llm = None
        self.request_count = 0
        self.last_request_time = 0
        self.initialize_llm()

    def initialize_llm(self):
        self.llm = Groq(model=self.models[self.current_model_index]["name"], api_key=GROQ_API_KEY, temperature=0.1)

    def rotate_model(self):
        self.current_model_index = (self.current_model_index + 1) % len(self.models)
        self.initialize_llm()
        logging.info(f"Switched to model: {self.get_current_model()}")

    def respect_rate_limit(self):
        current_time = time.time()
        if current_time - self.last_request_time < 60:  # 60 seconds window
            if self.request_count >= 14400 / 1440:  # 14400 requests per day, divided by minutes in a day
                sleep_time = 60 - (current_time - self.last_request_time)
                logging.info(f"Rate limit approached. Sleeping for {sleep_time:.2f} seconds")
                time.sleep(sleep_time)
                self.request_count = 0
                self.last_request_time = time.time()
        else:
            self.request_count = 0
            self.last_request_time = current_time
        self.request_count += 1

    def chat(self, messages, max_retries=3, retry_delay=5):
        original_model_index = self.current_model_index
        for attempt in range(max_retries * len(self.models)):
            try:
                self.respect_rate_limit()
                response = self.llm.chat(messages)
                logging.info(f"Successfully processed using model: {self.get_current_model()}")
                return response.message.content
            except Exception as e:
                error_message = str(e)
                logging.warning(f"Error with model {self.get_current_model()}: {error_message}. Attempt {attempt + 1} of {max_retries * len(self.models)}")
                
                if "429" in error_message:
                    # Rate limit error
                    retry_after = int(error_message.split("Please try again in ")[1].split("s.")[0])
                    logging.info(f"Rate limit exceeded. Waiting for {retry_after} seconds before retrying.")
                    time.sleep(retry_after)
                elif "400" in error_message and "maximum context length" in error_message.lower():
                    # Context length error
                    logging.info(f"Context length exceeded for model {self.get_current_model()}. Rotating to next model.")
                    self.rotate_model()
                elif "404" in error_message and "model does not exist" in error_message.lower():
                    # Model not found error
                    logging.info(f"Model {self.get_current_model()} not found. Rotating to next model.")
                    self.rotate_model()
                else:
                    # Other errors
                    if attempt % max_retries == max_retries - 1:
                        logging.info(f"Rotating to next model after multiple failures.")
                        self.rotate_model()
                    else:
                        logging.info(f"Retrying in {retry_delay} seconds...")
                        time.sleep(retry_delay)
                
                # If we've tried all models and are back to the original, break the loop
                if self.current_model_index == original_model_index and attempt >= len(self.models):
                    break

        logging.error("Failed to get a response after trying all models and multiple retries.")
        raise Exception("Failed to get a response after trying all models and multiple retries.")

    def get_current_model(self):
        return self.models[self.current_model_index]["name"]
    



def process_file(file_path, llm):
    with open(file_path, 'r', encoding='utf-8') as file:
        content = file.read()

    messages = [
        ChatMessage(role="system", content=SYSTEM_PROMPT),
        ChatMessage(role="user", content=content)
    ]

    current_model = llm.get_current_model()
    logging.info(f"Processing file {file_path} with model: {current_model}")
    
    response = llm.chat(messages)
    
    logging.info(f"Finished processing {file_path} with model: {current_model}")
    return response


def main():
    try:
        llm = GroqLLM()
        input_folder = 'out'
        output_folder = 'output_clean_formatted'

        os.makedirs(output_folder, exist_ok=True)

        files_to_process = [f for f in os.listdir(input_folder) if f.endswith('.md')]
        logging.info(f"Found {len(files_to_process)} .md files in the input folder.")

        for filename in files_to_process:
            input_path = os.path.join(input_folder, filename)
            output_path = os.path.join(output_folder, filename)

            if os.path.exists(output_path):
                logging.info(f"Skipping {filename} as it already exists in the output folder.")
                continue

            logging.info(f"Starting to process {filename}...")
            try:
                processed_content = process_file(input_path, llm)

                with open(output_path, 'w', encoding='utf-8') as file:
                    file.write(processed_content)

                logging.info(f"Saved processed content to {output_path}")
            except Exception as e:
                logging.error(f"Error processing {filename}: {str(e)}")
                logging.exception("Exception details:")

        logging.info("Finished processing all files.")
    except Exception as e:
        logging.error(f"An unexpected error occurred: {str(e)}")
        logging.exception("Exception details:")

if __name__ == "__main__":
    main()

## Trying to optimize llm v3


In [38]:
import time
import logging
from llama_index.llms.groq import Groq
from llama_index.core.llms import ChatMessage

class GroqLLM:
    def __init__(self):
        self.models = [
            {"name": "llama-3.1-70b-versatile", "context_window": 8192},
            {"name": "llama-3.1-8b-instant", "context_window": 8192},
            {"name": "mixtral-8x7b-32768", "context_window": 32768},
            {"name": "llama3-70b-8192", "context_window": 8192},
            {"name": "llama3-8b-8192", "context_window": 8192},
            {"name": "gemma2-9b-it", "context_window": 8192},
            {"name": "gemma-7b-it", "context_window": 8192},
            {"name": "llama-3.2-90b-vision-preview", "context_window": 8192},
            {"name": "llama-3.2-11b-vision-preview", "context_window": 8192},
            {"name": "llama3-groq-70b-8192-tool-use-preview", "context_window": 8192},
            {"name": "llama3-groq-8b-8192-tool-use-preview", "context_window": 8192},
            {"name": "llama-3.2-3b-preview", "context_window": 8192},
            {"name": "llama-3.2-1b-preview", "context_window": 8192},
            {"name": "llama-guard-3-8b", "context_window": 8192},
            {"name": "llava-v1.5-7b-4096-preview", "context_window": 4096},
        ]
        self.current_model_index = 0
        self.last_request_time = 0
        self.requests_in_window = 0
        self.rate_limit = 500000  # tokens per minute
        self.window_size = 60  # 1 minute window

    def get_current_model(self):
        return self.models[self.current_model_index]

    def rotate_model(self):
        self.current_model_index = (self.current_model_index + 1) % len(self.models)
        logging.info(f"Rotating to model: {self.get_current_model()['name']}")

    def wait_for_rate_limit(self, tokens_requested):
        current_time = time.time()
        time_since_last_request = current_time - self.last_request_time

        if time_since_last_request >= self.window_size:
            self.requests_in_window = 0
        else:
            self.requests_in_window -= max(0, (self.requests_in_window * time_since_last_request / self.window_size))

        if self.requests_in_window + tokens_requested > self.rate_limit:
            wait_time = (self.requests_in_window + tokens_requested - self.rate_limit) / (self.rate_limit / self.window_size)
            logging.info(f"Rate limit approached. Waiting for {wait_time:.2f} seconds")
            time.sleep(wait_time)

        self.requests_in_window += tokens_requested
        self.last_request_time = time.time()

    def chat(self, messages, max_retries=3):
        for attempt in range(max_retries * len(self.models)):
            current_model = self.get_current_model()
            llm = Groq(model=current_model["name"])

            try:
                # Estimate token count (this is a rough estimate, you may want to use a proper tokenizer)
                tokens_requested = sum(len(msg.content.split()) for msg in messages) * 1.3

                self.wait_for_rate_limit(tokens_requested)

                response = llm.chat(messages)
                return response.message.content

            except Exception as e:
                error_message = str(e)
                logging.warning(f"Error with model {current_model['name']}: {error_message}. Attempt {attempt + 1} of {max_retries * len(self.models)}")

                if "429" in error_message:
                    retry_after = 60  # Default to 60 seconds if we can't parse the time
                    try:
                        retry_after = int(float(error_message.split("Please try again in ")[1].split("s")[0]))
                    except:
                        pass
                    logging.info(f"Rate limit exceeded. Waiting for {retry_after} seconds before retrying.")
                    time.sleep(retry_after)
                    self.rotate_model()
                elif attempt % max_retries == max_retries - 1:
                    self.rotate_model()
                else:
                    time.sleep(5)  # Wait for 5 seconds before retrying with the same model

        logging.error("Failed to get a response after trying all models and multiple retries.")
        return None

def process_file(file_path, llm):
    try:
        with open(file_path, 'r', encoding='utf-8') as file:
            content = file.read()

        messages = [
            ChatMessage(role="system", content="You are a helpful assistant that cleans and formats text."),
            ChatMessage(role="user", content=f"Please clean and format the following text, removing any HTML tags, fixing formatting issues, and ensuring it's well-structured:\n\n{content}")
        ]

        processed_content = llm.chat(messages)
        return processed_content
    except Exception as e:
        logging.error(f"Failed to process {file_path}: {str(e)}")
        return None
    
def main():
    llm = GroqLLM()
    input_folder = 'out'
    output_folder = 'output_clean_formatted'
    failed_folder = 'failed_processing'

    os.makedirs(output_folder, exist_ok=True)
    os.makedirs(failed_folder, exist_ok=True)

    files_to_process = [f for f in os.listdir(input_folder) if f.endswith('.md')]
    logging.info(f"Found {len(files_to_process)} .md files in the input folder.")

    for filename in files_to_process:
        input_path = os.path.join(input_folder, filename)
        output_path = os.path.join(output_folder, filename)

        if os.path.exists(output_path):
            logging.info(f"Skipping {filename} as it already exists in the output folder.")
            continue

        logging.info(f"Starting to process {filename}...")
        processed_content = process_file(input_path, llm)

        if processed_content:
            with open(output_path, 'w', encoding='utf-8') as file:
                file.write(processed_content)
            logging.info(f"Saved processed content to {output_path}")
        else:
            failed_path = os.path.join(failed_folder, filename)
            os.rename(input_path, failed_path)
            logging.warning(f"Moved {filename} to {failed_folder} due to processing failure.")

    logging.info("Finished processing all files.")

if __name__ == "__main__":
    main()

