In [None]:
import langchain
import pandas as pd
from langchain.chains import RetrievalQA
from langchain.document_loaders import CSVLoader
from langchain.embeddings import HuggingFaceInstructEmbeddings
from langchain.llms import CTransformers
from langchain.vectorstores import DocArrayInMemorySearch

In [None]:
# globals
model_path = "/Users/bsantanna/dev/workspace/community/Llama-2-7b-chat-hf"
embedding_model = 'hkunlp/instructor-xl'
instruction_model_path = f"{model_path}/gguf-model-f16.bin"
static_document_src_path = 'dataset/wine_100.csv'
n_gpu_layers = 32
n_batch = 512
n_ctx = 5120
n_tokens = 256
n_repetition_penalty = 1.0
n_temperature = 0.6
config = {
    'max_new_tokens': n_tokens,
    'repetition_penalty': n_repetition_penalty,
    'batch_size': n_batch,
    'context_length': n_ctx,
    'reset': True,
    'temperature': n_temperature,
    'gpu_layers': n_gpu_layers
}

In [None]:
# Load dataframe for analysis
df = pd.read_csv(static_document_src_path)

In [None]:
# initialize embeddings
embedding = HuggingFaceInstructEmbeddings(model_name=embedding_model)

In [None]:
# Load CSV document  
loader = CSVLoader(file_path=static_document_src_path)

# initialize db 
docs = loader.load()
db = DocArrayInMemorySearch.from_documents(
    docs,
    embedding
)

# initialize retriever
retriever = db.as_retriever()

In [None]:
# load model
model = CTransformers(model=instruction_model_path, gpu_layers=n_gpu_layers, config=config)

In [None]:
# initialize chain
qa_chain = RetrievalQA.from_chain_type(
    llm=model,
    retriever=retriever
)

In [None]:
df[df['country'] == 'Portugal']

In [None]:
# Query
query = "Recommend me a wine from Tejo, Portugal."
# langchain.debug = True
response = qa_chain.run(query)
# langchain.debug = False
print(response)

In [None]:
df[df['country'] == 'Argentina']

In [None]:
# Query
query = "Recommend me a wine from Argentina with black-cherry aroma."
response = qa_chain.run(query)
print(response)

In [None]:
df[df['country'] == 'France']

In [None]:
# Query
query = "Recommend me a French wine that pairs well with seafood."
response = qa_chain.run(query)
print(response)

In [None]:
# Query
query = "Recommend me a French wine that pairs well with pasta."
response = qa_chain.run(query)
print(response)

In [None]:
df[df['country'] == 'US']

In [None]:
# Query
query = "Recommend me a wine from US with smoky taste."
langchain.debug = True
response = qa_chain.run(query)
langchain.debug = False

In [None]:
print(response)