In [17]:
# !pip install --upgrade huggingface_hub langchain InstructorEmbedding sentence_transformers pandas "langchain[docarray]"

In [15]:
import pandas as pd
from langchain.embeddings import HuggingFaceInstructEmbeddings
from langchain.llms import HuggingFaceHub
from langchain.chains import RetrievalQA
from langchain.document_loaders import CSVLoader
from langchain.vectorstores import DocArrayInMemorySearch
from langchain.indexes import VectorstoreIndexCreator

In [24]:
# HUB API KEY
api_key = '?'

## Using RetrievalQA chain and HuggingFaceHub model

In [17]:
# initialize LLM
instruction_model = 'google/flan-t5-xxl'
embedding_model = 'hkunlp/instructor-xl'
llm = HuggingFaceHub(
    huggingfacehub_api_token=api_key,
    repo_id=instruction_model,
    model_kwargs={'temperature': 0.1, 'max_new_tokens': 1000}
)

# Initialize documents
file = 'dataset/wine_100.csv'
loader = CSVLoader(file_path=file)
docs = loader.load()

# initialize embeddings
embedding = HuggingFaceInstructEmbeddings(model_name=embedding_model)
index = VectorstoreIndexCreator(
    embedding=embedding,
    vectorstore_cls=DocArrayInMemorySearch
).from_loaders([loader])

# initialize db 
db = DocArrayInMemorySearch.from_documents(
    docs, 
    embedding
)

# initialize retriever
retriever = db.as_retriever()

# initialize chain
qa_chain = RetrievalQA.from_chain_type(
    llm=llm, 
    chain_type="stuff", # map_reduce, refine, map_rerank
    retriever=retriever, 
    verbose=True
)

load INSTRUCTOR_Transformer
max_seq_length  512


In [19]:
# Query
query = "Recommend me a nice wine from Italy."
response = qa_chain.run(query)
print(response)



[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m
Canicatt 2009 Aynat Nero d'Avola (Sicilia)


In [23]:
# Query
query = "Recommend me two nice wines from Chile."
response = qa_chain.run(query)
print(response)



[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m
Estampa 2011 Estate Viognier-Chardonnay and Sundance 2011 Merlot
