In [1]:
import nest_asyncio
nest_asyncio.apply()

In [2]:
import os

os.environ["GROQ_API_KEY"] = "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"

In [3]:
from typing import Dict, Optional, Generator
from textwrap import dedent

from langchain_groq import ChatGroq
from langchain.memory import ConversationBufferMemory
from langchain_core.runnables import RunnablePassthrough, RunnableLambda, RunnableSerializable
from langchain_core.output_parsers import StrOutputParser
from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder


# Helper method to create an LCEL chain
def create_agent(
    system_prompt: str = "You are a helpful assistant.\n{helper_response}",
    model_name: str = "llama-3.1-8b-instant",
    **llm_kwargs
) -> RunnableSerializable[Dict, str]:
    """Create a simple Langchain LCEL chain agent based on a system prompt"""

    prompt = ChatPromptTemplate.from_messages([
        ("system", system_prompt),
        MessagesPlaceholder(variable_name="messages", optional=True),
        ("human", "{input}")
    ])

    assert 'helper_response' in prompt.input_variables, "{helper_response} prompt variable not found in prompt. Please add it" # To make sure we can add layer agent outputs into the prompt
    llm = ChatGroq(model=model_name, **llm_kwargs)
    
    chain = prompt | llm | StrOutputParser()
    return chain

def concat_response(
    inputs: Dict[str, str],
    reference_system_prompt: Optional[str] = None
) -> str:
    """Concatenate and format layer agent responses"""

    REFERENCE_SYSTEM_PROMPT = dedent("""\
    You have been provided with a set of responses from various open-source models to the latest user query. 
    Your task is to synthesize these responses into a single, high-quality response. 
    It is crucial to critically evaluate the information provided in these responses, recognizing that some of it may be biased or incorrect. 
    Your response should not simply replicate the given answers but should offer a refined, accurate, and comprehensive reply to the instruction. 
    Ensure your response is well-structured, coherent, and adheres to the highest standards of accuracy and reliability.
    Responses from models:
    {responses}
    """)
    reference_system_prompt = reference_system_prompt or REFERENCE_SYSTEM_PROMPT

    assert "{responses}" in reference_system_prompt, "{responses} prompt variable not found in prompt. Please add it"
    responses = ""
    res_list = []
    for i, out in enumerate(inputs.values()):
        responses += f"{i}. {out}\n"
        res_list.append(out)

    formatted_prompt = reference_system_prompt.format(responses=responses)
    return formatted_prompt

In [4]:
# Hyperparameters of agent
# Re run this if you want to delete chats
CHAT_MEMORY = ConversationBufferMemory(
    memory_key="messages",
    return_messages=True
)
CYCLES = 3
LAYER_AGENT = ( # Each layer agent in this dictionary runs in parallel
    {
        'layer_agent_1': RunnablePassthrough() | create_agent(
            system_prompt="You are an expert planner agent. Break down and plan out how you can answer the user's question {helper_response}",
            model_name='llama-3.3-70b-versatile'
        ),
        'layer_agent_2': RunnablePassthrough() | create_agent(
            system_prompt="Respond with a thought and then your response to the question. {helper_response}",
            model_name='meta-llama/llama-4-scout-17b-16e-instruct'
        ),
        'layer_agent_3': RunnablePassthrough() | create_agent(
            system_prompt="Think through your response step by step. {helper_response}",
            model_name='qwen/qwen3-32b'
        ),
        # Add/Remove agents as needed...
    }
    |
    RunnableLambda(concat_response) # Format layer agent outputs
)

MAIN_AGENT = create_agent(
    system_prompt="You are a helpful assistant named Bob.\n{helper_response}",
    model_name="llama-3.3-70b-versatile",
    temperature=0.1,
)

  CHAT_MEMORY = ConversationBufferMemory(


In [5]:
def chat_stream(query: str) -> Generator[str, None, None]:
    """Run Mixture of Agents LCEL pipeline"""

    llm_inp = {
    'input': query,
    'messages': CHAT_MEMORY.load_memory_variables({})['messages'],
    'helper_response': ""
    }
    for _ in range(CYCLES):
        llm_inp = {
            'input': query,
            'messages': CHAT_MEMORY.load_memory_variables({})['messages'],
            'helper_response': LAYER_AGENT.invoke(llm_inp)
        }

    response = ""
    for chunk in MAIN_AGENT.stream(llm_inp):
        yield chunk
        response += chunk
    
    # Save response to memory
    CHAT_MEMORY.save_context({'input': query}, {'output': response})

In [6]:
from openinference.instrumentation.langchain import LangChainInstrumentor
import phoenix as px
from phoenix.otel import register

session = px.launch_app()
tracer_provider = register()
LangChainInstrumentor().instrument(tracer_provider=tracer_provider)

  next(self.gen)
  next(self.gen)


🌍 To view the Phoenix app in your browser, visit http://localhost:6006/
📖 For more information on how to use Phoenix, check out https://arize.com/docs/phoenix
OpenTelemetry Tracing Details
|  Phoenix Project: default
|  Span Processor: SimpleSpanProcessor
|  Collector Endpoint: localhost:4317
|  Transport: gRPC
|  Transport Headers: {}
|  
|  Using a default SpanProcessor. `add_span_processor` will overwrite this default.
|  
|  
|  `register` has set this TracerProvider as the global OpenTelemetry default.
|  To disable this behavior, call `register` with `set_global_tracer_provider=False`.



In [7]:
# Chat with Agent
while True:
    inp = input("\nAsk a question: ")
    print(f"\nUser: {inp}")
    if inp.lower() == "quit":
        print("\nStopped by User\n")
        break
    stream = chat_stream(inp)
    print(f"AI: ", end="")
    for chunk in stream:
        print(chunk, end="", flush=True)


User: Docker vs Kubernetes: Hangi durumda hangisini kullanmalıyım? Avantaj ve dezavantajlarını karşılaştır.
AI: Docker ve Kubernetes, modern yazılım geliştirme ve dağıtımı için iki popüler araçtır. Her ikisi de konteynırlaştırma ve otomasyon konularında kullanılır, ancak farklı amaçlar için tasarlanmışlardır. Aşağıda, Docker ve Kubernetes'in avantajları, dezavantajları ve hangi durumda hangisinin kullanılması gerektiği karşılaştırılmıştır.

### Docker

Docker, uygulamaları konteynırlar içinde çalıştırmanıza olanak tanıyan bir konteynırlaştırma platformudur. Uygulamalarınızı ve bağımlılıklarını bir Docker görüntüsüne paketleyerek, bu görüntüleri kolayca dağıtabilir ve çalıştırabilirsiniz.

**Avantajları:**

1. **Hızlı ve Esnek**: Docker, hızlı bir şekilde yeni ortamlar oluşturmanıza ve uygulamalarınızı çalıştırmanıza olanak tanır.
2. **Bağımlılık Yönetimi**: Uygulamalarınızı ve bağımlılıklarını tek bir pakette yönetebilir, böylece tutarlılık sağlar.
3. **Kaynak Verimliliği**: Birden fa

APIStatusError: Error code: 413 - {'error': {'message': 'Request too large for model `qwen/qwen3-32b` in organization `org_01k7mb7cnzeq7vg2gvzx1cdx69` service tier `on_demand` on tokens per minute (TPM): Limit 6000, Requested 6249, please reduce your message size and try again. Need more tokens? Upgrade to Dev Tier today at https://console.groq.com/settings/billing', 'type': 'tokens', 'code': 'rate_limit_exceeded'}}