In [1]:
# !pip install --upgrade langchain accelerate transformers sentencepiece
# !pip install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cpu
!export PYTORCH_ENABLE_MPS_FALLBACK=1

In [2]:
import pandas as pd
from langchain.embeddings import HuggingFaceInstructEmbeddings
from langchain.chains import RetrievalQA
from langchain.document_loaders import CSVLoader
from langchain.vectorstores import DocArrayInMemorySearch
from langchain.indexes import VectorstoreIndexCreator
from langchain.llms import HuggingFacePipeline
from transformers import pipeline, T5Tokenizer, T5ForConditionalGeneration

  warn("The installed version of bitsandbytes was compiled without GPU support. "


'NoneType' object has no attribute 'cadam32bit_grad_fp32'


In [3]:
# initialize LLM
instruction_model = 'google/flan-t5-xxl'

tokenizer = T5Tokenizer.from_pretrained(instruction_model)
model = T5ForConditionalGeneration.from_pretrained(instruction_model, device_map="auto", offload_folder="/tmp/offload")

generate_text = pipeline(
    "text2text-generation",
    max_length=1000,
    model=model,
    tokenizer=tokenizer,
    trust_remote_code=True,
    device_map="auto",
)

transformer_pipeline = HuggingFacePipeline(pipeline=generate_text)

You are using the legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This means that tokens that come after special tokens will not be properly handled. We recommend you to read the related pull request available at https://github.com/huggingface/transformers/pull/24565


Loading checkpoint shards:   0%|          | 0/5 [00:00<?, ?it/s]

In [4]:
# Initialize documents
file = 'dataset/wine_100.csv'
loader = CSVLoader(file_path=file)

# initialize embeddings
embedding_model = 'hkunlp/instructor-xl'
embedding = HuggingFaceInstructEmbeddings(model_name=embedding_model)
index = VectorstoreIndexCreator(
    embedding=embedding,
    vectorstore_cls=DocArrayInMemorySearch
).from_loaders([loader])

load INSTRUCTOR_Transformer
max_seq_length  512


In [5]:
# initialize db 
docs = loader.load()
db = DocArrayInMemorySearch.from_documents(
    docs, 
    embedding
)

# initialize retriever
retriever = db.as_retriever()

# initialize chain
qa_chain = RetrievalQA.from_chain_type(
    llm=transformer_pipeline, 
    chain_type="stuff", # map_reduce, refine, map_rerank
    retriever=retriever, 
    verbose=True
)

In [6]:
# Query
query = "Recommend me a nice wine from Italy."
response = qa_chain.run(query)
print(response)



[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m
Canicatt 2009 Aynat Nero d'Avola (Sicilia)


In [7]:
# Query
query = "Recommend me two nice wines from Chile."
response = qa_chain.run(query)
print(response)



[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m
Tres Palacios 2011 Reserve Pinot Noir (Maipo Valley) and Sundance 2011 Merlot (Maule Valley)
