In [53]:
import asyncio
import nest_asyncio
import os
import logging
import argparse
from dotenv import load_dotenv
from together import AsyncTogether, Together, error

nest_asyncio.apply()

In [54]:
# Configuración de registro (logging)
class CustomFormatter(logging.Formatter):
    blue = "\033[94m"
    green = "\033[92m"
    yellow = "\033[93m"
    red = "\033[91m"
    bold_red = "\033[1;91m"
    reset = "\033[0m"
    format = "%(asctime)s - %(levelname)s - %(message)s"
    
    FORMATS = {
        logging.DEBUG: blue + format + reset,
        logging.INFO: green + format + reset,
        logging.WARNING: yellow + format + reset,
        logging.ERROR: red + format + reset,
        logging.CRITICAL: bold_red + format + reset
    }
    
    def format(self, record):
        log_fmt = self.FORMATS.get(record.levelno)
        formatter = logging.Formatter(log_fmt, datefmt='%Y-%m-%d %H:%M:%S')
        return formatter.format(record)

In [55]:
# Configuración inicial del registro
logger = logging.getLogger(__name__)
logger.setLevel(logging.DEBUG)
logger.handlers = []
ch = logging.StreamHandler()
ch.setLevel(logging.DEBUG)
ch.setFormatter(CustomFormatter())
logger.addHandler(ch)

# Carga variables de entorno desde el archivo .env
_ = load_dotenv()

In [56]:
# Constantes del sistema
API_KEY = os.environ.get("TOGETHER_API_KEY")
TEMPERATURE = 0.7
MAX_TOKENS = 512
RETRY_DELAYS = [1, 2, 4]

In [57]:
# Inicializa los clientes de la API Together
client = Together(api_key=API_KEY)
async_client = AsyncTogether(api_key=API_KEY)

In [58]:
# Listas de modelos
PROPOSAL_MODELS = [
    "Qwen/Qwen2-72B-Instruct",
    "google/gemma-2-27b-it",
    "zero-one-ai/Yi-34B-Chat",
    "deepseek-ai/deepseek-1Lm-67b-chat"
]

In [59]:
AGGREGATOR_MODEL = "meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo"
AGGREGATOR_SYSTEM_PROMPT = """\
You have been provided with a set of responses from various open-source models to the latest user query. \
Your task is to synthesize these responses into a single, high-quality response. \
It is crucial to critically evaluate the information provided in these responses, recognizing that some of it may be biased or inaccurate. \
Your response should not simply replicate the given answers but should offer a refined, accurate, and comprehensive reply. \
Ensure your response is well-structured, coherent, and adheres to the highest standards of accuracy and reliability.
Responses from models:"""

In [60]:
async def run_llm(model, user_prompt):
    for sleep_time in RETRY_DELAYS:
        try:
            logger.info(f"Calling model {model} with prompt: {user_prompt}")
            response = await async_client.chat.completions.create(
                model=model,
                messages=[{"role": "user", "content": user_prompt}],
                temperature=TEMPERATURE,
                max_tokens=MAX_TOKENS,
            )
            logger.debug(f"> Received response from model [{model}]: {response.choices[0].message.content}")
            return response.choices[0].message.content
        except error.RateLimitError as e:
            logger.warning(f"Rate limit exceeded for model {model}: {e}")
            await asyncio.sleep(sleep_time)
        except Exception as e:
            logger.error(f"Error running model {model}: {e}")
            return None
    logger.error(f"Failed to run model {model} after multiple retries")

In [61]:
def __collect_responses(system_prompt, results):
    logger.info("Collecting responses from proposal models")
    return (
        system_prompt
        + "\n\n"
        + "\n\n".join(f"{i + 1}. {element}" for i, element in enumerate(results))
    )

In [62]:
async def __gather_responses(user_prompt, layer=1):
    logger.debug(f"Running layer {layer}")
    logger.info(f"Gathering responses from proposal models")
    results = await asyncio.gather(*(run_llm(model, user_prompt) for model in PROPOSAL_MODELS))
    results = [result for result in results if result]  # Filtra las respuestas nulas
    logger.info(f"Collected {len(results)} responses from proposal models")
    return results

In [63]:
def __stream_final_response(finalStream):
    logger.info("Streaming final response for aggregator models")
    for chunk in finalStream:
        print(chunk.choices[0].delta.content or "", end="", flush=True)

In [64]:
async def _run_aggregator_model(user_prompt, concatenated_results, layer=2):
    logger.debug(f"Running layer {layer} (aggregator model)")
    finalStream = client.chat.completions.create(
        model=AGGREGATOR_MODEL,
        messages=[
            {"role": "system", "content": AGGREGATOR_SYSTEM_PROMPT + "\n" + concatenated_results},
            {"role": "user", "content": user_prompt},
        ],
        stream=True,
    )
    __stream_final_response(finalStream)

In [65]:
async def run(user_prompt):
    logger.info("Starting response aggregation process")
    concatenated_results = __collect_responses(AGGREGATOR_SYSTEM_PROMPT, await __gather_responses(user_prompt))
    await _run_aggregator_model(user_prompt, concatenated_results)
    logger.info("Response aggregation process completed")

In [66]:
async def two_layer_moa(user_prompt):
    results = await __gather_responses(user_prompt)
    concatenated_results = "\n".join([f"{i+1}. {element}" for i, element in enumerate(results)])
    await _run_aggregator_model(user_prompt, concatenated_results)

In [67]:
async def multi_layer_moa(user_prompt, layers):
    results = await __gather_responses(user_prompt)
    
    for layer in range(1, layers - 1):  # Ejecuta las capas intermedias
        logger.debug(f"Running layer {layer}")
        results = await __gather_responses(user_prompt, layer=layer)
    
    concatenated_results = __collect_responses(AGGREGATOR_SYSTEM_PROMPT, results)
    await _run_aggregator_model(user_prompt, concatenated_results, layer=layers)

In [68]:
# Función principal modificada para Jupyter
async def run_moa_async(mode="two_layer", prompt="What is the meaning of life?", layers=3):
    if mode == "two_layer":
        logger.info("Running two-layer MOA")
        await two_layer_moa(prompt)
    elif mode == "multi_layer":
        logger.info("Running multi-layer MOA")
        await multi_layer_moa(prompt, layers)
    else:
        logger.error(f"Invalid mode: {mode}")

In [69]:
# Función wrapper para ejecutar run_moa_async
def run_moa(mode="two_layer", prompt="What is the meaning of life?", layers=3):
    asyncio.get_event_loop().run_until_complete(run_moa_async(mode, prompt, layers))

In [70]:
# Ejemplo de uso en Jupyter:
run_moa(mode="two_layer", prompt="What is the future of AI?")

[92m2024-08-08 18:17:39 - INFO - Running two-layer MOA[0m
[94m2024-08-08 18:17:39 - DEBUG - Running layer 1[0m
[92m2024-08-08 18:17:39 - INFO - Gathering responses from proposal models[0m
[92m2024-08-08 18:17:39 - INFO - Calling model Qwen/Qwen2-72B-Instruct with prompt: What is the future of AI?[0m
[92m2024-08-08 18:17:39 - INFO - Calling model google/gemma-2-27b-it with prompt: What is the future of AI?[0m
[92m2024-08-08 18:17:39 - INFO - Calling model zero-one-ai/Yi-34B-Chat with prompt: What is the future of AI?[0m
[92m2024-08-08 18:17:39 - INFO - Calling model deepseek-ai/deepseek-1Lm-67b-chat with prompt: What is the future of AI?[0m
[91m2024-08-08 18:17:40 - ERROR - Error running model deepseek-ai/deepseek-1Lm-67b-chat: Error code: 404 - {"message": "Unable to access model deepseek-ai/deepseek-1Lm-67b-chat. Please visit https://api.together.xyz to see the list of supported models.", "type_": "invalid_request_error", "code": "model_not_found"}[0m
[92m2024-08-08 1

The future of Artificial Intelligence (AI) is multifaceted and holds great potential to transform various aspects of our lives. While there are many exciting developments on the horizon, it's essential to acknowledge the challenges and risks associated with AI's growth. Here's a comprehensive overview of the future of AI, synthesizing the key points from the provided responses:

**Advancements in AI Capabilities:**

1. **Ubiquitous Integration**: AI will become increasingly integrated into our daily lives, from personal devices to larger applications like smart cities and self-driving cars.
2. **Advanced Automation**: AI will continue to automate tasks, leading to increased efficiency and productivity in industries like manufacturing, healthcare, and retail.
3. **Enhanced Healthcare**: AI will improve diagnostics, personalized medicine, and patient care through machine learning algorithms and data analysis.
4. **AI-Powered Education**: Education systems will be transformed with AI, off

1. Ejecución del script:
   - El script se ejecutó en modo "two-layer MOA" (Mixture of Agents de dos capas).
   - Intentó obtener respuestas de cuatro modelos diferentes.
   - Hubo algunos problemas de límite de tasa (rate limiting) con algunos modelos, pero finalmente logró obtener respuestas de tres de ellos: Yi-34B-Chat, Gemma-2-27b-it, y Qwen2-72B-Instruct.
   - El modelo agregador (probablemente Meta-Llama-3.1-70B-Instruct-Turbo) sintetizó estas respuestas en una respuesta final.

2. Respuesta agregada sobre el futuro de la IA:
   La respuesta final cubrió varios aspectos del futuro de la IA:

   a) Avances en capacidades de IA:
      - Integración ubicua en la vida diaria
      - Automatización avanzada en diversas industrias
      - Mejoras en atención médica
      - Educación personalizada
      - Desarrollo de IA explicable

   b) Impacto en la sociedad:
      - Disrupciones en el mercado laboral
      - Potencial para el crecimiento económico
      - Consideraciones éticas importantes
      - Necesidad de garantizar accesibilidad y equidad

   c) Desafíos y riesgos:
      - Problemas de sesgo y equidad en los sistemas de IA
      - Preocupaciones sobre privacidad y seguridad de datos
      - Posible desplazamiento laboral
      - Debates sobre riesgos existenciales de la IA superinteligente

   d) Un futuro no predeterminado:
      - Énfasis en que el futuro de la IA será moldeado por las decisiones que tomemos hoy
      - Importancia del diálogo abierto entre investigadores, políticos y el público

La respuesta agregada proporcionó una visión equilibrada, destacando tanto el potencial transformador de la IA como los desafíos y riesgos asociados. Enfatizó la importancia de un desarrollo responsable de la IA y la necesidad de abordar las implicaciones éticas, sociales y económicas de esta tecnología.

In [71]:
run_moa(mode="multi_layer", prompt="Explain quantum computing", layers=4)

[92m2024-08-08 18:19:22 - INFO - Running multi-layer MOA[0m
[94m2024-08-08 18:19:22 - DEBUG - Running layer 1[0m
[92m2024-08-08 18:19:22 - INFO - Gathering responses from proposal models[0m
[92m2024-08-08 18:19:22 - INFO - Calling model Qwen/Qwen2-72B-Instruct with prompt: Explain quantum computing[0m
[92m2024-08-08 18:19:22 - INFO - Calling model google/gemma-2-27b-it with prompt: Explain quantum computing[0m
[92m2024-08-08 18:19:22 - INFO - Calling model zero-one-ai/Yi-34B-Chat with prompt: Explain quantum computing[0m
[92m2024-08-08 18:19:22 - INFO - Calling model deepseek-ai/deepseek-1Lm-67b-chat with prompt: Explain quantum computing[0m
[91m2024-08-08 18:19:23 - ERROR - Error running model deepseek-ai/deepseek-1Lm-67b-chat: Error code: 404 - {"message": "Unable to access model deepseek-ai/deepseek-1Lm-67b-chat. Please visit https://api.together.xyz to see the list of supported models.", "type_": "invalid_request_error", "code": "model_not_found"}[0m
[92m2024-08-08

**Quantum Computing: A Revolutionary Technology**

Quantum computing is a type of computing that leverages the principles of quantum mechanics to process information in a fundamentally different way than classical computers. This technology has the potential to revolutionize various fields, including cryptography, chemistry, and machine learning, by solving complex problems exponentially faster than classical computers.

**Key Concepts:**

1. **Qubits (Quantum Bits):** The basic unit of quantum information, qubits can exist in multiple states simultaneously, known as a superposition. This allows qubits to process multiple possibilities at once, making quantum computers incredibly powerful.
2. **Superposition:** Qubits can be both 0 and 1 at the same time, enabling parallel processing and exponentially faster calculations.
3. **Entanglement:** Qubits can be linked together in a way that their states are correlated, regardless of distance. This phenomenon enables quantum computers to per

1. Ejecución del script:
   - El script se ejecutó en modo "multi-layer MOA" con 4 capas.
   - Intentó obtener respuestas de cuatro modelos diferentes en cada capa.
   - Hubo varios problemas de límite de tasa y algunos errores con el modelo deepseek-ai/deepseek-1Lm-67b-chat.
   - A pesar de estos problemas, el script logró obtener respuestas de al menos tres modelos en cada capa.
   - El modelo agregador (probablemente Meta-Llama-3.1-70B-Instruct-Turbo) sintetizó estas respuestas en una respuesta final.

2. Respuesta agregada sobre la computación cuántica:
   La respuesta final proporcionó una explicación completa y bien estructurada de la computación cuántica, cubriendo los siguientes aspectos:

   a) Definición y conceptos clave:
      - Qubits y su capacidad de superposición
      - Superposición y entrelazamiento cuántico
      - Interferencia cuántica
      - Algoritmos cuánticos

   b) Cómo funciona la computación cuántica:
      - Procesamiento cuántico
      - Medición cuántica
      - Corrección de errores cuánticos

   c) Aplicaciones y potencial:
      - Criptografía
      - Química y ciencia de materiales
      - Aprendizaje automático
      - Optimización

   d) Desafíos y limitaciones:
      - Escalabilidad
      - Corrección de errores
      - Ruido cuántico

   e) El futuro de la computación cuántica:
      - Expectativas de avances y accesibilidad
      - Potencial revolucionario en diversos campos
      - Necesidad de superar desafíos técnicos

La respuesta agregada proporcionó una visión completa y equilibrada de la computación cuántica, explicando tanto sus principios fundamentales como sus aplicaciones potenciales y desafíos actuales. El enfoque de múltiples capas parece haber permitido una síntesis más profunda y detallada de la información proporcionada por los diferentes modelos.