In [1]:
from langchain.llms import HuggingFacePipeline
from langchain import PromptTemplate, LLMChain

In [24]:
# https://www.youtube.com/watch?v=CMVq48torQY
import accelerate 
import json
import transformers

model_id = "lmsys/fastchat-t5-3b-v1.0"
config = transformers.AutoConfig.from_pretrained(model_id)

with accelerate.init_empty_weights():
    fake_model = transformers.AutoModelForCausalLM.from_config(config)
    
device_map = accelerate.infer_auto_device_map(fake_model, max_memory={0: "3GiB", "cpu": "24GiB"})
print(json.dumps(device_map, indent=4))

ValueError: Unrecognized configuration class <class 'transformers.models.t5.configuration_t5.T5Config'> for this kind of AutoModel: AutoModelForCausalLM.
Model type should be one of BartConfig, BertConfig, BertGenerationConfig, BigBirdConfig, BigBirdPegasusConfig, BioGptConfig, BlenderbotConfig, BlenderbotSmallConfig, BloomConfig, CamembertConfig, CodeGenConfig, CpmAntConfig, CTRLConfig, Data2VecTextConfig, ElectraConfig, ErnieConfig, FalconConfig, GitConfig, GPT2Config, GPT2Config, GPTBigCodeConfig, GPTNeoConfig, GPTNeoXConfig, GPTNeoXJapaneseConfig, GPTJConfig, LlamaConfig, MarianConfig, MBartConfig, MegaConfig, MegatronBertConfig, MusicgenConfig, MvpConfig, OpenLlamaConfig, OpenAIGPTConfig, OPTConfig, PegasusConfig, PLBartConfig, ProphetNetConfig, QDQBertConfig, ReformerConfig, RemBertConfig, RobertaConfig, RobertaPreLayerNormConfig, RoCBertConfig, RoFormerConfig, RwkvConfig, Speech2Text2Config, TransfoXLConfig, TrOCRConfig, XGLMConfig, XLMConfig, XLMProphetNetConfig, XLMRobertaConfig, XLMRobertaXLConfig, XLNetConfig, XmodConfig.

In [7]:
import warnings
warnings.filterwarnings('ignore')

In [3]:
# import os
# assert len(os.environ["HUGGINGFACEHUB_API_TOKEN"]) > 0

# choose GPU or CPU
# TODO: fix error "CUDA out of memory. Tried to allocate 40.00 MiB (GPU 0; 3.81 GiB total capacity; 3.36 GiB already allocated; 26.38 MiB free; 3.37 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF"
# import torch
# device = torch.cuda.current_device() if torch.cuda.is_available() else -1
device = -1

In [2]:
model_id, task = "lmsys/fastchat-t5-3b-v1.0", "text2text-generation"

# the model will be downloaded on first use, if not cached in ~/.cache/huggingface/hub/

model = HuggingFacePipeline.from_model_id(
    model_id=model_id,
    task=task,
    model_kwargs={
        "temperature": 0,
        "max_length": 1000
    },
)

You are using the legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This means that tokens that come after special tokens will not be properly handled. We recommend you to read the related pull request available at https://github.com/huggingface/transformers/pull/24565
Device has 1 GPUs available. Provide device={deviceId} to `from_model_id` to use availableGPUs for execution. deviceId is -1 (default) for CPU and can be a positive integer associated with CUDA device id.


In [3]:
template_text = """
{question}
"""
template = PromptTemplate(template=template_text, input_variables=["question"])
llm_chain = LLMChain(prompt=template, llm=model)

In [4]:
llm_chain("Who is Sheryl Crow?")["text"]



'<pad> Sheryl  Crow  is  an  American  singer,  songwriter,  and  actress.  She  is  best  known  for  her  role  as  the  lead  singer  and  lead  guitarist  of  the  rock  band  The  Band wagon,  and  for  her  role  as  the  lead  singer  and  lead  guitarist  of  the  alternative  rock  band  The  Mamas  and  the  Papas.  Crow  has  also  been  a  member  of  the  band  The  Mamas  and  the  Papas  since  its  formation  in  1995.\n'

In [4]:
llm_chain("How old is Sheryl Crow?")["text"]



'<pad> Sheryl  Crow  is  57  years  old.'

In [5]:
template_text = """
{question}
Explain step by step.
"""
template = PromptTemplate(template=template_text, input_variables=["question"])
llm_chain = LLMChain(prompt=template, llm=model)

In [6]:
llm_chain("How old is Sheryl Crow?")["text"]

'<pad> Sheryl Crow is a singer and songwriter who was born on February 28, 1969. So, Sheryl Crow is currently 57 years old. To find her age, you would need to subtract her age from her birth date. So, the answer is 57 years old.'

In [7]:
from langchain.document_loaders import WikipediaLoader

loader = WikipediaLoader("Sheryl_Crow")
document = loader.load()

In [8]:
# https://python.langchain.com/docs/use_cases/question_answering/#step-1-load

from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size = 500, chunk_overlap = 0)
all_splits = text_splitter.split_documents(document)

In [9]:
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma

# https://integrations.langchain.com/embeddings
hf_embeddings = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-mpnet-base-v2",
    model_kwargs={
        'device': 'cpu'
    },
    encode_kwargs={
        'normalize_embeddings': False
    }
)

vectorstore = Chroma.from_documents(documents=all_splits, embedding=hf_embeddings)

Downloading (…)a8e1d/.gitattributes:   0%|          | 0.00/1.18k [00:00<?, ?B/s]

Downloading (…)_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Downloading (…)b20bca8e1d/README.md:   0%|          | 0.00/10.6k [00:00<?, ?B/s]

Downloading (…)0bca8e1d/config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

Downloading (…)ce_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

Downloading (…)e1d/data_config.json:   0%|          | 0.00/39.3k [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/438M [00:00<?, ?B/s]

Downloading (…)nce_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

Downloading (…)a8e1d/tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

Downloading (…)8e1d/train_script.py:   0%|          | 0.00/13.1k [00:00<?, ?B/s]

Downloading (…)b20bca8e1d/vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading (…)bca8e1d/modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

ValueError: Could not import chromadb python package. Please install it with `pip install chromadb`.