In [1]:
# Source: https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF

from ctransformers import AutoModelForCausalLM

# Set gpu_layers to the number of layers to offload to GPU. Set to 0 if no GPU acceleration is available on your system.

llm = AutoModelForCausalLM.from_pretrained("TheBloke/Mistral-7B-Instruct-v0.1-GGUF", model_file="mistral-7b-instruct-v0.1.Q4_K_M.gguf", model_type="mistral", gpu_layers=50)

print(llm("AI is going to"))

Fetching 1 files:   0%|          | 0/1 [00:00<?, ?it/s]

Fetching 1 files:   0%|          | 0/1 [00:00<?, ?it/s]

 revolutionize the way we interact with machines, and it's already changing the world in ways we can hardly imagine. One of the most exciting aspects of this technology is its ability to learn from data, allowing machines to make predictions and decisions based on patterns they detect in large datasets.

This capability has enormous implications for industries like healthcare, finance, and transportation, where AI systems can help doctors diagnose diseases more accurately, investors make better investment decisions, and self-driving cars navigate the roads safely. But it also raises important ethical questions about privacy, bias, and accountability, as well as the potential for AI to replace human workers in certain jobs.

In this article, we'll explore some of the ways AI is being used today, and how these technologies are likely to evolve in the future. We'll also discuss some of the challenges and opportunities that arise from the increasing use of AI, and what steps we can take to

In [2]:
from ctransformers import AutoModelForCausalLM

async def callback(contents: str):
    llms = {}

    if "mistral" not in llms:
        llms["mistral"] = AutoModelForCausalLM.from_pretrained(
            "TheBloke/Mistral-7B-Instruct-v0.1-GGUF",
            model_file="mistral-7b-instruct-v0.1.Q4_K_M.gguf",
            gpu_layers=1,
        )

    llm = llms["mistral"]
    response = llm(contents, stream=True, max_new_tokens=1000)
    message = ""

    for token in response:
        message += token
        yield message


In [1]:
import panel as pn
from langchain.chains import LLMChain
from langchain.llms import CTransformers
from langchain.prompts import PromptTemplate

pn.extension()

# Model configurations
MODEL_KWARGS = {
    "llama": {
        "model": "TheBloke/Llama-2-7b-Chat-GGUF",
        "model_file": "llama-2-7b-chat.Q5_K_M.gguf",
    },
    "mistral": {
        "model": "TheBloke/Mistral-7B-Instruct-v0.1-GGUF",
        "model_file": "mistral-7b-instruct-v0.1.Q4_K_M.gguf",
    },
}

llm_chains = {}

# Template for the chatbot prompt
TEMPLATE = """<s>[INST] You are a friendly chat bot who's willing to help answer the user:
{user_input} [/INST] </s>
"""

async def callback(contents: str, user: str, instance: pn.chat.ChatInterface):
    """Callback function to process user input and return model responses."""
    config = {"max_new_tokens": 256, "temperature": 0.5}
    responses = {}

    # Loop through models and get individual responses
    for model_name, model_kwargs in MODEL_KWARGS.items():
        if model_name not in llm_chains:
            instance.placeholder_text = (
                f"Downloading {model_name}, this may take a few minutes,"
                f"or longer, depending on your internet connection."
            )

            # Initialize the LLMChain for the model
            llm = CTransformers(**model_kwargs, config=config)
            prompt = PromptTemplate(template=TEMPLATE, input_variables=["user_input"])
            llm_chain = LLMChain(prompt=prompt, llm=llm)
            llm_chains[model_name] = llm_chain

        # Get the response from the model
        response = await llm_chains[model_name].apredict(user_input=contents)
        responses[model_name] = response

    # Send individual responses to the chat interface
    for model_name, response in responses.items():
        instance.send(response, user=f"{model_name.capitalize()} Response", respond=False)

# Set up the chat interface
chat_interface = pn.chat.ChatInterface(callback=callback, placeholder_threshold=0.1)

# Initial message to the user
chat_interface.send(
    "Send a message to get individual replies from Llama 2 and Mistral (7B)!",
    user="System",
    respond=False,
)

chat_interface.servable()
