In [1]:
# !pip install --upgrade langchain llama-cpp-python

In [2]:
import pandas as pd
from langchain.embeddings import HuggingFaceInstructEmbeddings
from langchain.chains import RetrievalQA
from langchain.document_loaders import CSVLoader
from langchain.vectorstores import DocArrayInMemorySearch
from langchain.indexes import VectorstoreIndexCreator
from langchain.llms import LlamaCpp

In [3]:
# Globals
embedding_model = 'hkunlp/instructor-xl'
instruction_model_path="/Users/bsantanna/dev/workspace/community/Llama-2-7b-chat-hf/ggml-model-f16.bin"
static_document_src_path = 'dataset/wine_100.csv'
df = pd.read_csv(static_document_src_path)
n_gpu_layers = 1
n_batch = 512

In [4]:
# Initialize documents
loader = CSVLoader(file_path=static_document_src_path)

# initialize embeddings
embedding = HuggingFaceInstructEmbeddings(model_name=embedding_model)
index = VectorstoreIndexCreator(
    embedding=embedding,
    vectorstore_cls=DocArrayInMemorySearch
).from_loaders([loader])

  from tqdm.autonotebook import trange


load INSTRUCTOR_Transformer
'NoneType' object has no attribute 'cadam32bit_grad_fp32'


  warn("The installed version of bitsandbytes was compiled without GPU support. "


max_seq_length  512


In [5]:
# Load LLM instruction following model
llm = LlamaCpp(
    model_path=instruction_model_path,
    n_gpu_layers=n_gpu_layers,
    n_batch=n_batch,
    f16_kv=True,
    verbose=False,
)

llama.cpp: loading model from /Users/bsantanna/dev/workspace/community/Llama-2-7b-chat-hf/ggml-model-f16.bin
llama_model_load_internal: format     = ggjt v1 (pre #1405)
llama_model_load_internal: n_vocab    = 32000
llama_model_load_internal: n_ctx      = 512
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: n_gqa      = 1
llama_model_load_internal: rnorm_eps  = 1.0e-06
llama_model_load_internal: n_ff       = 11008
llama_model_load_internal: freq_base  = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype      = 1 (mostly F16)
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size =    0.08 MB
llama_model_load_internal: mem required  = 13155.10 MB (+  256.00 MB per state)
llama_new_context_wi

In [6]:
# initialize db 
docs = loader.load()
db = DocArrayInMemorySearch.from_documents(
    docs, 
    embedding
)

# initialize retriever
retriever = db.as_retriever()

# initialize chain
qa_chain = RetrievalQA.from_chain_type(
    llm=llm, 
    chain_type="stuff", # map_reduce, refine, map_rerank
    retriever=retriever, 
    verbose=False
)

In [7]:
df[df['country'] == 'Italy']

Unnamed: 0,country,title,description,variety,winery
0,Italy,Nicosia 2013 Vulkà Bianco (Etna),"Aromas include tropical fruit, broom, brimston...",White Blend,Nicosia
6,Italy,Terre di Giurfo 2013 Belsito Frappato (Vittoria),"Here's a bright, informal red that opens with ...",Frappato,Terre di Giurfo
13,Italy,Masseria Setteporte 2012 Rosso (Etna),This is dominated by oak and oak-driven aromas...,Nerello Mascalese,Masseria Setteporte
22,Italy,Baglio di Pianetto 2007 Ficiligno White (Sicilia),Delicate aromas recall white flower and citrus...,White Blend,Baglio di Pianetto
24,Italy,Canicattì 2009 Aynat Nero d'Avola (Sicilia),"Aromas of prune, blackcurrant, toast and oak c...",Nero d'Avola,Canicattì
26,Italy,Stemmari 2013 Dalila White (Terre Siciliane),Pretty aromas of yellow flower and stone fruit...,White Blend,Stemmari
27,Italy,Stemmari 2013 Nero d'Avola (Terre Siciliane),"Aromas recall ripe dark berry, toast and a whi...",Nero d'Avola,Stemmari
28,Italy,Terre di Giurfo 2011 Mascaria Barricato (Cera...,"Aromas suggest mature berry, scorched earth, a...",Red Blend,Terre di Giurfo
31,Italy,Duca di Salaparuta 2010 Calanìca Nero d'Avola-...,Merlot and Nero d'Avola form the base for this...,Red Blend,Duca di Salaparuta
32,Italy,Duca di Salaparuta 2011 Calanìca Grillo-Viogni...,"Part of the extended Calanìca series, this Gri...",White Blend,Duca di Salaparuta


In [18]:
# Query
query = "Recommend me a wine from Italy with white flower aroma."
response = qa_chain.run(query)
print(response)

 Based on the given context, I would recommend the Stemmari 2013 Dalila White (Terre Siciliane). It has pretty aromas of yellow


In [14]:
df[df['country'] == 'Argentina']

Unnamed: 0,country,title,description,variety,winery
16,Argentina,Felix Lavaque 2010 Felix Malbec (Cafayate),"Baked plum, molasses, balsamic vinegar and che...",Malbec,Felix Lavaque
17,Argentina,Gaucho Andino 2011 Winemaker Selection Malbec ...,Raw black-cherry aromas are direct and simple ...,Malbec,Gaucho Andino


In [17]:
query = "From Argentina with fruit aroma."
response = qa_chain.run(query)
print(response)

 The wine with the most distinct fruit aroma is... (Gaucho Andino 2011 Malbec from Mendoza).
