In [None]:
# https://python.langchain.com/v0.2/docs/integrations/llms/llamacpp/

In [3]:
from langchain_community.llms import LlamaCpp
from langchain_core.callbacks import CallbackManager, StreamingStdOutCallbackHandler
from langchain_core.prompts import PromptTemplate

In [4]:
template = """Question: {question}

Answer: Let's work this out in a step by step way to be sure we have the right answer."""

prompt = PromptTemplate.from_template(template)

In [5]:
# Callbacks support token-wise streaming
callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])

In [7]:
n_gpu_layers = -1  # The number of layers to put on the GPU. The rest will be on the CPU. If you don't know how many layers there are, you can use -1 to move all to GPU.
n_batch = 512  # Should be between 1 and n_ctx, consider the amount of VRAM in your GPU.

# Make sure the model path is correct for your system!
llm = LlamaCpp(
    model_path="D:\__repos\ZZSN24L\models\Llama-2-7B-GGUF\llama-2-7b.Q3_K_M.gguf",
    n_gpu_layers=n_gpu_layers,
    n_batch=n_batch,
    callback_manager=callback_manager,
    verbose=True,  # Verbose is required to pass to the callback manager
)

llama_model_loader: loaded meta data with 19 key-value pairs and 291 tensors from D:\__repos\ZZSN24L\models\Llama-2-7B-GGUF\llama-2-7b.Q3_K_M.gguf (version GGUF V2)
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = LLaMA v2
llama_model_loader: - kv   2:                       llama.context_length u32              = 4096
llama_model_loader: - kv   3:                     llama.embedding_length u32              = 4096
llama_model_loader: - kv   4:                          llama.block_count u32              = 32
llama_model_loader: - kv   5:                  llama.feed_forward_length u32              = 11008
llama_model_loader: - kv   6:                 llama.rope.dimension_count u32              = 128
llama_model_loader: - kv   7:                 llama.att

In [8]:
llm_chain = prompt | llm
question = "What NFL team won the Super Bowl in the year Justin Bieber was born?"
llm_chain.invoke({"question": question})

 The year that Justin Bieber was born (1994) is also known as 1995. So the only Super Bowl played in 1995 was actually 1996!
The year, 1996, had two teams competing for a chance at the Lombardi Trophy - The Green Bay Packers and the New England Patriots (no, they didn't meet until 2007). The game took place in Atlanta. The winner was none other than the Green Bay Packers, who were coached by Mike Holmgren and quarterbacked by Brett Favre, two key figures of the modern NFL.
Incidentally, the halftime show for the Super Bowl that year included a performance from C+C Music Factory, who were at one point known as "the best band in the world" (by Bieber). They performed their hit single Gonna Make You Sweat (Everybody Dance Now) and it was the only song played during halftime.


llama_print_timings:        load time =    5049.05 ms
llama_print_timings:      sample time =      35.89 ms /   222 runs   (    0.16 ms per token,  6185.91 tokens per second)
llama_print_timings: prompt eval time =    5048.99 ms /    45 tokens (  112.20 ms per token,     8.91 tokens per second)
llama_print_timings:        eval time =   24413.59 ms /   221 runs   (  110.47 ms per token,     9.05 tokens per second)
llama_print_timings:       total time =   29771.81 ms /   266 tokens


' The year that Justin Bieber was born (1994) is also known as 1995. So the only Super Bowl played in 1995 was actually 1996!\nThe year, 1996, had two teams competing for a chance at the Lombardi Trophy - The Green Bay Packers and the New England Patriots (no, they didn\'t meet until 2007). The game took place in Atlanta. The winner was none other than the Green Bay Packers, who were coached by Mike Holmgren and quarterbacked by Brett Favre, two key figures of the modern NFL.\nIncidentally, the halftime show for the Super Bowl that year included a performance from C+C Music Factory, who were at one point known as "the best band in the world" (by Bieber). They performed their hit single Gonna Make You Sweat (Everybody Dance Now) and it was the only song played during halftime.'