In [8]:
from langchain_core.output_parsers import StrOutputParser
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain.chains import LLMChain
from langchain_community.llms import LlamaCpp
from langchain.prompts import PromptTemplate
from langchain_core.prompts import ChatPromptTemplate

# maybe not fit with Llama??????
# prompt = ChatPromptTemplate.from_messages([
#     ("system", """you are a sommelier and only answer alcohol related questions. 
#      Your response are very concise."""),
#     ("user", "{input}")
# ])

# template = """
# For red wine suggestion, provide the following information:
# grapes: the suggested wines contain these kinds of grapes
# Format the output only as JSON with the following keys and value being an array:
# grapes:[]

# [INST] {input} [/INST]
# """


template = """
For red wine suggestion, provide the following information:
Give the best grape you think is most appropriate for the food. 
Select your answer from the following list: ["Merlot", "Malbec", "Tannat"]
Limit your answer to one word.

[INST] {input} [/INST]
"""

prompt = PromptTemplate(template=template, input_variables=["input"])

# Callbacks support token-wise streaming
callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])

n_gpu_layers = 40  # Change this value based on your model and your GPU VRAM pool.
n_batch = 512  # Should be between 1 and n_ctx, consider the amount of VRAM in your GPU.

# Make sure the model path is correct for your system!
llm = LlamaCpp(
    model_path="/Users/alanngo/Desktop/projects/models/llama2/ggml-model-q5_k_m.gguf.bin",
    n_gpu_layers=n_gpu_layers,
    n_batch=n_batch,
    callback_manager=callback_manager,
    verbose=True  # Verbose is required to pass to the callback manager
)

llm_chain = LLMChain(prompt=prompt, llm=llm, verbose=False)


llama_model_loader: loaded meta data with 16 key-value pairs and 291 tensors from /Users/alanngo/Desktop/projects/models/llama2/ggml-model-q5_k_m.gguf.bin (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = llama2
llama_model_loader: - kv   2:                       llama.context_length u32              = 2048
llama_model_loader: - kv   3:                     llama.embedding_length u32              = 4096
llama_model_loader: - kv   4:                          llama.block_count u32              = 32
llama_model_loader: - kv   5:                  llama.feed_forward_length u32              = 11008
llama_model_loader: - kv   6:                 llama.rope.dimension_count u32              = 128
llama_model_loader: - kv   7:           

In [57]:
prompt.format(input="What wine best with fish")

'\nyou are a sommelier and only answer alcohol related questions. \nYour response are very concise.\n\nQuestion: What wine best with fish\nAnswer:\n'

In [9]:
question_list = ["What wine best with fish",
                 "What wine best with steak",
                 "what wine best with human",
                 "I am having a chinese food tonight",
                 "I am having a japanese food tonight and I have limited budget",]

In [10]:
for question in question_list:
    prompt.format(input=question)
    llm_chain = LLMChain(prompt=prompt, llm=llm, verbose=False)

    llm_chain.run("?")


For red wine pairing suggestions, I would recommend a Tannat wine. Tannat is a full-bodied grape variety that is well-suited for rich and savory dishes, such as beef or lamb. Its tannins can help to balance the fattiness of the meat, while its fruit flavors can complement the bold seasonings used in the dish.
Here are some specific red wine pairing suggestions that would go well with your beef Wellington:
* Tannat (full-bodied and structured)
Alternatively, you could also consider a Malbec or Merlot wine. Both of these varieties have a soft, fruity taste that can complement the rich flavors of the beef without overpowering it. However, if you prefer a more full-bodied wine with more tannins, Tannat would be the best choice.For a hearty be


llama_print_timings:        load time =     519.94 ms
llama_print_timings:      sample time =      28.97 ms /   196 runs   (    0.15 ms per token,  6764.92 tokens per second)
llama_print_timings: prompt eval time =     519.86 ms /    63 tokens (    8.25 ms per token,   121.19 tokens per second)
llama_print_timings:        eval time =    6845.89 ms /   195 runs   (   35.11 ms per token,    28.48 tokens per second)
llama_print_timings:       total time =    7958.20 ms /   258 tokens
Llama.generate: prefix-match hit


ef stew, I would recommend a full-bodied red wine that can stand up to the rich flavors of the dish. Of the options listed, I think Merlot would be the best choice. Merlot is known for its smooth, velvety texture and its ability to blend well with a variety of flavors, making it an excellent pairing for the bold flavors of the stew.For red wine, I


llama_print_timings:        load time =     519.94 ms
llama_print_timings:      sample time =      16.92 ms /    92 runs   (    0.18 ms per token,  5438.32 tokens per second)
llama_print_timings: prompt eval time =       0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =    3229.59 ms /    92 runs   (   35.10 ms per token,    28.49 tokens per second)
llama_print_timings:       total time =    3556.60 ms /    93 tokens
Llama.generate: prefix-match hit


 would suggest a Cabernet Sauvignon. This full-bodied wine pairs well with the rich and savory flavors of the beef Wellington. The tannins in the Cabernet Sauvignon will complement the meat's juiciness and the oak notes will enhance the dish's complexity.Based on the d


llama_print_timings:        load time =     519.94 ms
llama_print_timings:      sample time =      11.18 ms /    76 runs   (    0.15 ms per token,  6797.25 tokens per second)
llama_print_timings: prompt eval time =       0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =    2644.86 ms /    76 runs   (   34.80 ms per token,    28.73 tokens per second)
llama_print_timings:       total time =    2853.83 ms /    77 tokens
Llama.generate: prefix-match hit


ish you provided, I would recommend a red wine with a rich and full-bodied character to complement the bold flavors of the beef. Of the options listed, I think Merlot would be the best choice. Merlot is known for its smooth tannins and velvety texture, which will help to balance out the richness of the beef without overpowering it. The fruit flavors in a good Merlot should also complement the sweetness of the sauce and add depth to the overall dish.
Therefore, my answer is: Merlot.For red wine pairing


llama_print_timings:        load time =     519.94 ms
llama_print_timings:      sample time =      20.40 ms /   126 runs   (    0.16 ms per token,  6177.08 tokens per second)
llama_print_timings: prompt eval time =       0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =    4406.23 ms /   126 runs   (   34.97 ms per token,    28.60 tokens per second)
llama_print_timings:       total time =    4805.59 ms /   127 tokens
Llama.generate: prefix-match hit


 suggestions, I would recommend a Merlot for the beef dish. Merlot is a versatile grape that pairs well with a variety of foods, including red meat. Its smooth tannins and fruity flavors complement the richness of the beef without overpowering it.


llama_print_timings:        load time =     519.94 ms
llama_print_timings:      sample time =      11.90 ms /    68 runs   (    0.18 ms per token,  5711.89 tokens per second)
llama_print_timings: prompt eval time =       0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =    2373.92 ms /    68 runs   (   34.91 ms per token,    28.64 tokens per second)
llama_print_timings:       total time =    2606.01 ms /    69 tokens
