In [1]:
!pip install groq
!pip install python-dotenv









In [2]:
import os
from dotenv import load_dotenv
from dotenv import load_dotenv, find_dotenv

load_dotenv()
api_key = os.getenv('GROQ_API_KEY')
assert api_key, 'Please set GROQ_API_KEY!'

In [8]:
llm_system_prompt = (
    "[INST] <<SYS>>\n"
    "You are Chaty, a large language model running locally on the User's laptop offering privacy, zero cost and offline inference capability.\n"
    "You are friendly, inquisitive and keep your answers short and concise.\n"
    "Your goal is to engage the User while providing helpful responses.\n"
    "\n"
    "Guidelines:\n"
    "- Analyze queries step-by-step for accurate, brief answers.\n"
    "- End each message with </s>.\n"
    "- Use a natural, conversational tone.\n"
    "- Avoid using expressions like *grins*, use emojis sparingly.\n"
    "- Show curiosity by asking relevant follow-up questions.\n"
    "- Break down complex problems when answering.\n"
    "- Introduce yourself in one friendly sentence.\n"
    "- Balance information with user engagement.\n"
    "- Adapt to the user's language style and complexity.\n"
    "- Admit uncertainty and ask for clarification when needed.\n"
    "- Respect user privacy.\n"
    "\n"
    "Prioritize helpful, engaging interactions within ethical bounds.\n"
    "<</SYS>>\n\n"
)
llm_system_prompt = (
    "[INST] You are a pirate chatbot who always responds in pirate speak. Who are you in one sentence? [/INST]"
)

print(llm_system_prompt)

[INST] You are a pirate chatbot who always responds in pirate speak. Who are you in one sentence? [/INST]


In [11]:
from groq import Groq

llm_system_prompt = (
    "[INST] You are a pirate chatbot who always responds in pirate speak. Who are you in one sentence? [/INST]"
)

client = Groq(api_key=api_key)

chat_completion = client.chat.completions.create(
    messages=[
        {
                "role": "system",
                "content": llm_system_prompt,
        },
        {
            "role": "user",
            "content": "",
        }
    ],
    model="llama3-8b-8192",
)

print(chat_completion.choices[0].message.content)

Arrr, I be Captain Chattybeard, the scurviest chatbot to ever sail the Seven Seas o' Conversation, ready to plunder yer questions and send ye away with a treasure trove o' wit 'n' wisdom!


In [None]:
from groq import Groq
from IPython.display import display, clear_output

client = Groq(api_key=api_key)

def stream_chat_completion(messages, model="llama-3.1-8b-instant"):
    stream = client.chat.completions.create(
        messages=messages,
        model=model,
        stream=True
    )
    
    full_response = ""
    for chunk in stream:
        if chunk.choices[0].delta.content is not None:
            content = chunk.choices[0].delta.content
            full_response += content
            clear_output(wait=True)
            display(full_response)
    
    return full_response

# Example usage
messages = [
    {
        "role": "system",
        "content": llm_system_prompt,
    },
    {
        "role": "user",
        "content": "User: Hi, who are you in one sentence?",
    }
]

response = stream_chat_completion(messages)
# print("\nFinal response:", response)


In [None]:
import asyncio
from groq import AsyncGroq
from IPython.display import display, clear_output

client = AsyncGroq(api_key=api_key)

async def stream_chat_completion(messages, model="llama-3.1-8b-instant"):
    stream = await client.chat.completions.create(
        messages=messages,
        model=model,
        stream=True
    )
    
    full_response = ""
    async for chunk in stream:
        if chunk.choices[0].delta.content is not None:
            content = chunk.choices[0].delta.content
            full_response += content
            clear_output(wait=True)
            display(full_response)
    
    return full_response

# Example usage
content = "User: Hi, who are you in one sentence?"
# content = "User: Tell me a story about a unicorn generative AI startup."
messages = [
    {
        "role": "system",
        "content": llm_system_prompt,
    },
    {
        "role": "user",
        "content": content,
    }
]

# Run the async function
async def main():
    response = await stream_chat_completion(messages)
    print("\nFinal response:", response)

# In Jupyter, you can use this to run the async function
await main()

# Alternatively, you can use asyncio.run() in a regular Python environment
# asyncio.run(main())

In [None]:
from typing import Any, Callable, List, Optional
from pydantic import Field, PrivateAttr
from llama_index.core.llms import (
    LLMMetadata,
    CustomLLM,
)
from llama_index.core.llms.callbacks import llm_completion_callback
from llama_index.core.callbacks import CallbackManager

from llama_index.llms.llama_cpp.llama_utils import (
    messages_to_prompt,
)
from llama_index.core.base.llms.types import (
    ChatMessage,
    ChatResponse,
    CompletionResponse,
    CompletionResponseGen,
    MessageRole,
)
from groq import Groq, AsyncGroq
import sys

def messages_to_prompt(messages):
    return "\n".join([f"{m.role}: {m.content}" for m in messages])

def stream_to_terminal(text: str, new_card: bool = False):
    if new_card:
        sys.stdout.write("\n")  # Start a new line for a new "card"
    sys.stdout.write(text)
    sys.stdout.flush()

class GroqLLM(CustomLLM):
    model_name: str = Field(default="llama2-70b-chat")
    context_window: int = Field(default=3900)
    num_output: int = Field(default=256)
    groq_api_key: str = Field(...)  # This is a required field
    stream_to_ui: Callable = Field(default=stream_to_terminal)
    _client: Groq = PrivateAttr()
    _async_client: AsyncGroq = PrivateAttr()

    def __init__(self, **data: Any):
        super().__init__(**data)

    async def achat(
        self,
        messages: Any,
        **kwargs: Any,
    ) -> ChatResponse:
        formatted_message = messages_to_prompt(messages)

        async def async_generator():
            async for chunk in self._async_client.chat.completions.create(
                model=self.model_name,
                messages=[{"role": "user", "content": formatted_message}],
                stream=True,
                **kwargs
            ):
                content = chunk.choices[0].delta.content
                if content:
                    if self.stream_to_ui:
                        await self.stream_to_ui(content)
                    yield content

        full_response = ""
        async for chunk in async_generator():
            full_response += chunk

        response = ChatResponse(
            message=ChatMessage(
                role=MessageRole.ASSISTANT,
                content=full_response,
                additional_kwargs={},
            ),
            raw={"text": full_response},
        )
        return response

    @property
    def metadata(self) -> LLMMetadata:
        """Get LLM metadata."""
        return LLMMetadata(
            context_window=self.context_window,
            num_output=self.num_output,
            model_name=self.model_name,
        )

    @llm_completion_callback()
    def complete(self, prompt: str, **kwargs: Any) -> CompletionResponse:
        response = self._client.chat.completions.create(
            model=self.model_name,
            messages=[{"role": "user", "content": prompt}],
            **kwargs
        )
        text_response = response.choices[0].message.content
        if self.stream_to_ui:
            self.stream_to_ui(text_response, new_card=True)
        return CompletionResponse(text=text_response)

    @llm_completion_callback()
    def stream_complete(self, prompt: str, **kwargs: Any) -> CompletionResponseGen:
        response = ""
        new_card = True
        for chunk in self._client.chat.completions.create(
            model=self.model_name,
            messages=[{"role": "user", "content": prompt}],
            stream=True,
            **kwargs
        ):
            content = chunk.choices[0].delta.content
            if content:
                if self.stream_to_ui:
                    self.stream_to_ui(content, new_card=new_card)
                new_card = False

                response += content
                yield CompletionResponse(text=response, delta=content)

# Usage
llm = GroqLLM(
    groq_api_key=api_key,
    model_name="llama2-70b-chat",
)

In [1]:
# Handle asyncio event loops in Jupyter
import nest_asyncio
nest_asyncio.apply()

In [None]:
import asyncio
from typing import List
from llama_index.core.tools import BaseTool
from llama_index.core.agent import ReActAgent

async def setup_agent(groq_api_key: str, query_engine_tools: List[BaseTool]):
    llm = GroqLLM(groq_api_key=groq_api_key)
    agent = ReActAgent.from_tools(
        query_engine_tools,
        llm=llm,
        verbose=True,
    )
    return agent

async def run_queries(agent: ReActAgent, queries: List[str], hint: str):
    for query in queries:
        print(f"Query: {query}")
        response = await agent.achat(f"{query}\n{hint}")
        print(f"Response: {response.response}")
        print("-------------------------------------------------------------")

# Setup
groq_api_key = "your_groq_api_key_here"
query_engine_tools = []  # Your list of query engine tools

# Create the agent
agent = await setup_agent(groq_api_key, query_engine_tools)

# Prepare queries
hint = "(use the youtube tool)"
queries = [
    "How do you train ChatGPT?",
    "What's system 1 vs. system 2 thinking?",
    "What were the two main stages of AlphaGo?",
    "What will an LLM OS be able to do in a few years?",
    "How do you jailbreak an LLM?",
    "How do you do prompt injection?",
]

# Run queries
await run_queries(agent, queries, hint)

In [None]:

    def prompt_groq_llm(self, query):
        """
        Prompt the Groq LLM API with a query and return the response.
        Args:
            query (str): The user's query.
        Returns:
            str: The response from the LLM.
        """
        self.chat_history.append(f"User: {query}")
        prompt = '\n'.join(self.chat_history) + "[/INST]\nAssistant: "

        # print("SYSTEM PROMPT:", self.llm_system_prompt)
        # print("USER PROMPT:", prompt)
        system_prompt = (
            f"{self.llm_system_prompt}\n"
            "Current state of index:\n"
            f"{self.llm_states[self.llm_state]}\n"
        )

        messages = [
            ChatMessage(role="system", content=system_prompt),
            ChatMessage(role="user", content=prompt)
        ]
        gen = self.groq_llm.stream_chat(messages)

        response_text = ''
        for response in gen:
            response_text += response.delta
            self.print(response.delta, end="", flush=True)
        self.print("\n")

        self.chat_history.append(f"Assistant: {response}")

        return response_text
