In [1]:
# !pip install --upgrade langchain llama-cpp-python

In [2]:
import time

import langchain
import pandas as pd
from langchain.chains import RetrievalQA
from langchain.document_loaders import CSVLoader
from langchain.embeddings import LlamaCppEmbeddings
from langchain.llms import LlamaCpp
from langchain.vectorstores import DocArrayInMemorySearch

## Model Setup on MacBook Pro M1

1. [Requested access](https://ai.meta.com/resources/models-and-libraries/llama-downloads/)
2. Downloaded model from [HF model from Hugging Face](https://huggingface.co/meta-llama/Llama-2-7b-chat-hf)
3. Cloned [llama.cpp](https://github.com/ggerganov/llama.cpp.git)
4. From llama.cpp root directory, converted model to ggml format with `python convert.py <path_to_downloaded>/Llama-2-7b-chat-hf`


In [3]:
# Globals
model_path = "/Users/bsantanna/dev/workspace/community/Llama-2-7b-chat-hf/ggml-model-f16.bin"
static_document_src_path = 'dataset/wine_100.csv'
df = pd.read_csv(static_document_src_path)
n_gpu_layers = 1
n_batch = 512
n_ctx = 2048

In [4]:
# Initialize documents
loader = CSVLoader(file_path=static_document_src_path)

# initialize embeddings
embedding = LlamaCppEmbeddings(model_path=model_path,
                               n_gpu_layers=n_gpu_layers,
                               n_batch=n_batch,
                               f16_kv=True)

llama.cpp: loading model from /Users/bsantanna/dev/workspace/community/Llama-2-7b-chat-hf/ggml-model-f16.bin
llama_model_load_internal: format     = ggjt v1 (pre #1405)
llama_model_load_internal: n_vocab    = 32000
llama_model_load_internal: n_ctx      = 512
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: n_gqa      = 1
llama_model_load_internal: rnorm_eps  = 1.0e-06
llama_model_load_internal: n_ff       = 11008
llama_model_load_internal: freq_base  = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype      = 1 (mostly F16)
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size =    0.08 MB
llama_model_load_internal: mem required  = 13155.10 MB (+  256.00 MB per state)
llama_new_context_wi

In [5]:
# Load LLM instruction following model
llm = LlamaCpp(
    model_path=model_path,
    n_gpu_layers=n_gpu_layers,
    n_batch=n_batch,
    n_ctx=n_ctx,
    f16_kv=True,
    verbose=False
)

llama.cpp: loading model from /Users/bsantanna/dev/workspace/community/Llama-2-7b-chat-hf/ggml-model-f16.bin
llama_model_load_internal: format     = ggjt v1 (pre #1405)
llama_model_load_internal: n_vocab    = 32000
llama_model_load_internal: n_ctx      = 2048
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: n_gqa      = 1
llama_model_load_internal: rnorm_eps  = 1.0e-06
llama_model_load_internal: n_ff       = 11008
llama_model_load_internal: freq_base  = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype      = 1 (mostly F16)
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size =    0.08 MB
llama_model_load_internal: mem required  = 13155.10 MB (+ 1024.00 MB per state)
llama_new_context_w

In [6]:
# initialize db 
docs = loader.load()
db = DocArrayInMemorySearch.from_documents(
    docs,
    embedding
)

# initialize retriever
retriever = db.as_retriever()

# initialize chain
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",  # map_reduce, refine, map_rerank
    retriever=retriever,
    verbose=False
)


llama_print_timings:        load time =  3391.35 ms
llama_print_timings:      sample time =     0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_print_timings: prompt eval time =  3391.01 ms /    93 tokens (   36.46 ms per token,    27.43 tokens per second)
llama_print_timings:        eval time =     0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_print_timings:       total time =  3391.63 ms

llama_print_timings:        load time =  3391.35 ms
llama_print_timings:      sample time =     0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_print_timings: prompt eval time =  3303.90 ms /   113 tokens (   29.24 ms per token,    34.20 tokens per second)
llama_print_timings:        eval time =     0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_print_timings:       total time =  3304.94 ms

llama_print_timings:        load time =  3391.35 ms
llama_print_timings:   

In [7]:
df[df['country'] == 'Italy']

Unnamed: 0,country,title,description,variety,winery
0,Italy,Nicosia 2013 Vulkà Bianco (Etna),"Aromas include tropical fruit, broom, brimston...",White Blend,Nicosia
6,Italy,Terre di Giurfo 2013 Belsito Frappato (Vittoria),"Here's a bright, informal red that opens with ...",Frappato,Terre di Giurfo
13,Italy,Masseria Setteporte 2012 Rosso (Etna),This is dominated by oak and oak-driven aromas...,Nerello Mascalese,Masseria Setteporte
22,Italy,Baglio di Pianetto 2007 Ficiligno White (Sicilia),Delicate aromas recall white flower and citrus...,White Blend,Baglio di Pianetto
24,Italy,Canicattì 2009 Aynat Nero d'Avola (Sicilia),"Aromas of prune, blackcurrant, toast and oak c...",Nero d'Avola,Canicattì
26,Italy,Stemmari 2013 Dalila White (Terre Siciliane),Pretty aromas of yellow flower and stone fruit...,White Blend,Stemmari
27,Italy,Stemmari 2013 Nero d'Avola (Terre Siciliane),"Aromas recall ripe dark berry, toast and a whi...",Nero d'Avola,Stemmari
28,Italy,Terre di Giurfo 2011 Mascaria Barricato (Cera...,"Aromas suggest mature berry, scorched earth, a...",Red Blend,Terre di Giurfo
31,Italy,Duca di Salaparuta 2010 Calanìca Nero d'Avola-...,Merlot and Nero d'Avola form the base for this...,Red Blend,Duca di Salaparuta
32,Italy,Duca di Salaparuta 2011 Calanìca Grillo-Viogni...,"Part of the extended Calanìca series, this Gri...",White Blend,Duca di Salaparuta


In [8]:
# Query
query = "Recommend me a wine from Italy with white flower aroma."
response = qa_chain.run(query)
print(response)


llama_print_timings:        load time =  3391.35 ms
llama_print_timings:      sample time =     0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_print_timings: prompt eval time =  2226.70 ms /    16 tokens (  139.17 ms per token,     7.19 tokens per second)
llama_print_timings:        eval time =     0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_print_timings:       total time =  2227.85 ms


 I can recommend a wine from Italy with a white flower aroma based on the information provided in the context. The Corvo 2010 Rosso Red (Sicilia) has notes of blue flower and dusty mineral accents, which suggests that it may have a white flower aroma. Therefore, I would recommend this wine for your consideration.


In [9]:
df[df['country'] == 'Chile']

Unnamed: 0,country,title,description,variety,winery
36,Chile,Estampa 2011 Estate Viognier-Chardonnay (Colch...,"White flower, lychee and apple aromas carry th...",Viognier-Chardonnay,Estampa
44,Chile,Sundance 2011 Merlot (Maule Valley),A berry aroma comes with cola and herb notes. ...,Merlot,Sundance
51,Chile,Casa Silva 2008 Gran Reserva Petit Verdot (Col...,This is much different than Casa Silva's 2009 ...,Petit Verdot,Casa Silva
58,Chile,Tres Palacios 2011 Reserve Pinot Noir (Maipo V...,Lightly herbal strawberry and raspberry aromas...,Pinot Noir,Tres Palacios
80,Chile,Aresti 2014 Special Release Reserva Carmenère ...,Caramelized oak and vanilla aromas are front a...,Carmenère,Aresti


In [10]:
query = "Recommend me a dry wine from Chile that pairs well with seafood."

start = time.time()
langchain.debug = True
response = qa_chain.run(query)
langchain.debug = False
end = time.time()
duration_seconds = end - start

[32;1m[1;3m[chain/start][0m [1m[1:RunTypeEnum.chain:RetrievalQA] Entering Chain run with input:
[0m{
  "query": "Recommend me a dry wine from Chile that pairs well with seafood."
}



llama_print_timings:        load time =  3391.35 ms
llama_print_timings:      sample time =     0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_print_timings: prompt eval time =  2695.17 ms /    18 tokens (  149.73 ms per token,     6.68 tokens per second)
llama_print_timings:        eval time =     0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_print_timings:       total time =  2695.51 ms


[32;1m[1;3m[chain/start][0m [1m[1:RunTypeEnum.chain:RetrievalQA > 3:RunTypeEnum.chain:StuffDocumentsChain] Entering Chain run with input:
[0m[inputs]
[32;1m[1;3m[chain/start][0m [1m[1:RunTypeEnum.chain:RetrievalQA > 3:RunTypeEnum.chain:StuffDocumentsChain > 4:RunTypeEnum.chain:LLMChain] Entering Chain run with input:
[0m{
  "question": "Recommend me a dry wine from Chile that pairs well with seafood.",
  "context": "country: US\ntitle: Hindsight 2013 Bella Vetta Vineyard Cabernet Sauvignon (Howell Mountain)\ndescription: Juicy plum, raspberry and pencil lead lead the way in this vineyard designate, a site that's 2,000 feet high. Tobacco and cedar meet a full-bodied hit of oak and puckering tannin, the wine still youthfully wrapped in its full-bodied boldness.\nvariety: Cabernet Sauvignon\nwinery: Hindsight\n\ncountry: US\ntitle: Envolve 2010 Puma Springs Vineyard Red (Dry Creek Valley)\ndescription: Rustic and dry, this has flavors of berries, currants, licorice and spices. M

In [11]:
print(f"\n\nDuration: {duration_seconds:0.2f} seconds\n\n")
print(response)



Duration: 14.28 seconds


 Aresti 2014 Special Release Reserva Carmenère (Rapel Valley)
