In [1]:
from langchain.llms import HuggingFacePipeline
from langchain import PromptTemplate, LLMChain

In [7]:
import warnings
warnings.filterwarnings('ignore')

In [2]:
model_id, task = "lmsys/fastchat-t5-3b-v1.0", "text2text-generation"

# the model will be downloaded on first use, if not cached in ~/.cache/huggingface/hub/

model = HuggingFacePipeline.from_model_id(
    model_id=model_id,
    task=task,
    model_kwargs={
        "temperature": 0,
        "max_length": 1000
    },
)

You are using the legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This means that tokens that come after special tokens will not be properly handled. We recommend you to read the related pull request available at https://github.com/huggingface/transformers/pull/24565
Device has 1 GPUs available. Provide device={deviceId} to `from_model_id` to use availableGPUs for execution. deviceId is -1 (default) for CPU and can be a positive integer associated with CUDA device id.


In [3]:
template_text = """
{question}
"""
template = PromptTemplate(template=template_text, input_variables=["question"])
llm_chain = LLMChain(prompt=template, llm=model)

In [4]:
llm_chain("Who is Sheryl Crow?")["text"]



'<pad> Sheryl  Crow  is  an  American  singer,  songwriter,  and  actress.  She  is  best  known  for  her  role  as  the  lead  singer  and  lead  guitarist  of  the  rock  band  The  Band wagon,  and  for  her  role  as  the  lead  singer  and  lead  guitarist  of  the  alternative  rock  band  The  Mamas  and  the  Papas.  Crow  has  also  been  a  member  of  the  band  The  Mamas  and  the  Papas  since  its  formation  in  1995.\n'

In [4]:
llm_chain("How old is Sheryl Crow?")["text"]



'<pad> Sheryl  Crow  is  57  years  old.'

In [5]:
template_text = """
{question}
Explain step by step.
"""
template = PromptTemplate(template=template_text, input_variables=["question"])
llm_chain = LLMChain(prompt=template, llm=model)

In [6]:
llm_chain("How old is Sheryl Crow?")["text"]

'<pad> Sheryl Crow is a singer and songwriter who was born on February 28, 1969. So, Sheryl Crow is currently 57 years old. To find her age, you would need to subtract her age from her birth date. So, the answer is 57 years old.'

In [7]:
llm_chain("The year is 2023. How old is Sheryl Crow?")["text"]

'<pad> Sheryl Crow was born in 1969. So, in 2023, Sheryl Crow would be 69 years old. So the answer is 69.'

In [2]:
from langchain.document_loaders import WikipediaLoader

loader = WikipediaLoader("Sheryl_Crow")
document = loader.load()

In [3]:
# https://python.langchain.com/docs/use_cases/question_answering/#step-1-load

from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size = 500, chunk_overlap = 0)
all_splits = text_splitter.split_documents(document)

In [1]:
# https://github.com/chroma-core/chroma/blob/main/chromadb/__init__.py#L57
import sys
__import__("pysqlite3")
sys.modules["sqlite3"] = sys.modules.pop("pysqlite3")

In [4]:
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma

# https://integrations.langchain.com/embeddings
hf_embeddings = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-mpnet-base-v2",
    model_kwargs={
        'device': 'cpu'
    },
    encode_kwargs={
        'normalize_embeddings': False
    }
)

vectorstore = Chroma.from_documents(documents=all_splits, embedding=hf_embeddings)

In [6]:
vectorstore.similarity_search("Kid Rock")

[Document(page_content='Kid Rock), and "Soak Up the Sun" (2002).', metadata={'source': 'https://en.wikipedia.org/wiki/Sheryl_Crow', 'summary': 'Sheryl Suzanne Crow (born February 11, 1962) is an American musician, singer, and songwriter. Her music incorporates elements of rock, pop, country, folk, and blues. She has released eleven studio albums, five compilations, and three live albums, and contributed to several film soundtracks. Her most popular songs include "All I Wanna Do" (1994), "Strong Enough" (1994), "If It Makes You Happy" (1996), "Everyday Is a Winding Road" (1996), "My Favorite Mistake" (1998), "Picture" (2002, duet with Kid Rock), and "Soak Up the Sun" (2002).\nCrow has sold more than 50 million albums worldwide and won nine Grammy Awards (out of 32 nominations) from the National Academy of Recording Arts and Sciences. As an actress, Crow has appeared on the television series 30 Rock, Cop Rock, GCB, Cougar Town, Jon Stewart\'s and Stephen Colbert\'s Rally to Restore Sanit

In [7]:
from langchain.chains import RetrievalQA
from langchain.llms import HuggingFacePipeline

model_id, task = "lmsys/fastchat-t5-3b-v1.0", "text2text-generation"

model = HuggingFacePipeline.from_model_id(
    model_id=model_id,
    task=task,
    model_kwargs={
        "temperature": 0,
        "max_length": 1000
    },
)

qa_chain = RetrievalQA.from_chain_type(model, retriever=vectorstore.as_retriever())
qa_chain({"query": "How old is Sheryl Crow?"})

You are using the legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This means that tokens that come after special tokens will not be properly handled. We recommend you to read the related pull request available at https://github.com/huggingface/transformers/pull/24565
Device has 1 GPUs available. Provide device={deviceId} to `from_model_id` to use availableGPUs for execution. deviceId is -1 (default) for CPU and can be a positive integer associated with CUDA device id.


{'query': 'How old is Sheryl Crow?',
 'result': '<pad>  Sheryl  Suzanne  Crow  (born  February  11,  1962)  is  55  years  old.\n'}