In [1]:
from torch import cuda, bfloat16
import transformers

model_name = '/home/ubuntu/model/falcon-40b-instruct'

device = f'cuda:{cuda.current_device()}' if cuda.is_available() else 'cpu'

# set quantization configuration to load large model with less GPU memory
# this requires the `bitsandbytes` library
bnb_config = transformers.BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type='nf4',
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=bfloat16
)

model = transformers.AutoModelForCausalLM.from_pretrained(
    model_name,
    trust_remote_code=True,
    quantization_config=bnb_config,
    device_map='auto'
)
model.eval()
print(f"Model loaded on {device}")


Welcome to bitsandbytes. For bug reports, please run

python -m bitsandbytes

 and submit this information together with your error trace to: https://github.com/TimDettmers/bitsandbytes/issues
bin /home/ubuntu/anaconda3/envs/ldm/lib/python3.8/site-packages/bitsandbytes/libbitsandbytes_cuda113.so
CUDA SETUP: CUDA runtime path found: /home/ubuntu/anaconda3/envs/ldm/lib/libcudart.so
CUDA SETUP: Highest compute capability among GPUs detected: 8.0
CUDA SETUP: Detected CUDA version 113
CUDA SETUP: Loading binary /home/ubuntu/anaconda3/envs/ldm/lib/python3.8/site-packages/bitsandbytes/libbitsandbytes_cuda113.so...


Either way, this might cause trouble in the future:
If you get `CUDA error: invalid device function` errors, the above might be the cause and the solution is to make sure only one ['libcudart.so', 'libcudart.so.11.0', 'libcudart.so.12.0'] in the paths that we search based on your env.
  warn(msg)


Loading checkpoint shards:   0%|          | 0/9 [00:00<?, ?it/s]

Model loaded on cuda:0


In [2]:
from transformers import StoppingCriteria, StoppingCriteriaList
import torch


tokenizer = transformers.AutoTokenizer.from_pretrained(model_name)
# we create a list of stopping criteria
stop_token_ids = [
    tokenizer.convert_tokens_to_ids(x) for x in [
        ['Human', ':'], ['AI', ':']
    ]
]
stop_token_ids = [torch.LongTensor(x).to(device) for x in stop_token_ids]

In [3]:
from transformers import StoppingCriteria, StoppingCriteriaList

# define custom stopping criteria object
class StopOnTokens(StoppingCriteria):
    def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool:
        for stop_ids in stop_token_ids:
            if torch.eq(input_ids[0][-len(stop_ids):], stop_ids).all():
                return True
        return False

stopping_criteria = StoppingCriteriaList([StopOnTokens()])

In [4]:
generate_text = transformers.pipeline(
    model=model, tokenizer=tokenizer,
    return_full_text=True,  # langchain expects the full text
    task='text-generation',
    # we pass model parameters here too
    stopping_criteria=stopping_criteria,  # without this model rambles during chat
    temperature=0.0,  # 'randomness' of outputs, 0.0 is the min and 1.0 the max
    max_new_tokens=512,  # mex number of tokens to generate in the output
    repetition_penalty=1.1  # without this output begins repeating
)

The model 'RWForCausalLM' is not supported for text-generation. Supported models are ['BartForCausalLM', 'BertLMHeadModel', 'BertGenerationDecoder', 'BigBirdForCausalLM', 'BigBirdPegasusForCausalLM', 'BioGptForCausalLM', 'BlenderbotForCausalLM', 'BlenderbotSmallForCausalLM', 'BloomForCausalLM', 'CamembertForCausalLM', 'CodeGenForCausalLM', 'CpmAntForCausalLM', 'CTRLLMHeadModel', 'Data2VecTextForCausalLM', 'ElectraForCausalLM', 'ErnieForCausalLM', 'GitForCausalLM', 'GPT2LMHeadModel', 'GPT2LMHeadModel', 'GPTBigCodeForCausalLM', 'GPTNeoForCausalLM', 'GPTNeoXForCausalLM', 'GPTNeoXJapaneseForCausalLM', 'GPTJForCausalLM', 'LlamaForCausalLM', 'MarianForCausalLM', 'MBartForCausalLM', 'MegaForCausalLM', 'MegatronBertForCausalLM', 'MvpForCausalLM', 'OpenLlamaForCausalLM', 'OpenAIGPTLMHeadModel', 'OPTForCausalLM', 'PegasusForCausalLM', 'PLBartForCausalLM', 'ProphetNetForCausalLM', 'QDQBertLMHeadModel', 'ReformerModelWithLMHead', 'RemBertForCausalLM', 'RobertaForCausalLM', 'RobertaPreLayerNormForC

In [5]:
from langchain import PromptTemplate, LLMChain
from langchain.llms import HuggingFacePipeline
llm = HuggingFacePipeline(pipeline=generate_text)

In [75]:
Interprete_template = """
Use exactly one word and Interprete the text below and evaluate the text and provide me with the sentiment and subject. 
sentiment: is the text in a positive, neutral or negative sentiment? 
subject: What subject is the text about? Use exactly one word.

text: {input}
'AI':
"""

In [76]:
prompt_template = PromptTemplate.from_template(template=Interprete_template)
Interprete_chain = LLMChain(llm=llm, prompt=prompt_template,output_key="Interprete")
Interprete_chain.predict(input="I ordered Pizza Salami and it was awesome!")

Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.


'The text has a positive sentiment as it expresses enjoyment of the pizza. The subject of the text is food, specifically pizza with salami toppings.'

In [88]:
response_template = """
Given{Interprete}, you will get a sentiment and subject to evaluate.
If sentiment is about a bad experince, write and email template to to customer service at xxx@gmail.com. 
If the sentiment is positive or neutral write a thank note. 
'AI':
"""

# This is an LLMChain to write a review given a dish name and the experience.
prompt_Interprete = PromptTemplate.from_template(
    template=Interprete_template
)

chain_Interprete = LLMChain(llm=llm, prompt=prompt_Interprete, output_key="response")
# This is an LLMChain to write a follow-up comment given the restaurant review.
prompt_response = PromptTemplate.from_template(template=response_template)
chain_response = LLMChain(llm=llm, prompt=prompt_response, output_key="response")

In [90]:
from langchain.chains import SequentialChain
overall_chain = SequentialChain(
    chains=[Interprete_chain, chain_response],
    input_variables=["input"],
    output_variables=["Interprete", "response"],
)
overall_chain({"input": "I ordered Pizza Salami and it was great"})

Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.


{'input': 'I ordered Pizza Salami and it was great',
 'Interprete': 'The text has a positive sentiment as it mentions that the pizza was great. The subject of the text is food, specifically pizza with salami toppings.',
 'response': 'Thank you for your feedback! We are glad to hear that you enjoyed our pizza with salami toppings. We appreciate your business and look forward to serving you again soon.'}

In [6]:
from langchain.chains import SequentialChain

# This is an LLMChain to write a review given a dish name and the experience.
prompt_review = PromptTemplate.from_template(
    template="You ordered {dish_name} and your experience was {experience}. Write a review: "
)
chain_review = LLMChain(llm=llm, prompt=prompt_review, output_key="review")

# This is an LLMChain to write a follow-up comment given the restaurant review.
prompt_comment = PromptTemplate.from_template(
    template="Given the restaurant review: {review}, write a follow-up comment: "
)
chain_comment = LLMChain(llm=llm, prompt=prompt_comment, output_key="comment")

# This is an LLMChain to summarize a review.
prompt_summary = PromptTemplate.from_template(
    template="Summarise the review in one short sentence: \n\n {review}"
)
chain_summary = LLMChain(llm=llm, prompt=prompt_summary, output_key="summary")

# This is an LLMChain to translate a summary into German.
prompt_translation = PromptTemplate.from_template(
    template="Translate the summary to german: \n\n {summary}"
)
chain_translation = LLMChain(
    llm=llm, prompt=prompt_translation, output_key="german_translation"
)

overall_chain = SequentialChain(
    chains=[chain_review, chain_comment, chain_summary, chain_translation],
    input_variables=["dish_name", "experience"],
    output_variables=["review", "comment", "summary", "german_translation"],
)

overall_chain({"dish_name": "Pizza Salami", "experience": "It was awful!"})

Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.


{'dish_name': 'Pizza Salami',
 'experience': 'It was awful!',
 'review': "\nI'm sorry to hear that you had an unpleasant experience with the pizza salami. Can you please provide more details about what went wrong? This will help me understand the issue better and improve our services in the future.",
 'comment': '\nThank you for reaching out to us regarding your recent dining experience at our restaurant. We apologize for any inconvenience caused by the pizza salami. Please let us know more about what went wrong so we can address the issue and make improvements. Your feedback is valuable to us, and we appreciate your patience as we work towards providing a better dining experience for all of our guests.',
 'summary': '',
 'german_translation': '\nDie Geschichte handelt von einem Jungen, der in einer Welt lebt, in der alle Menschen mit magischen Fähigkeiten geboren werden. Er ist ein sogenannter "Nichtmagier", was bedeutet, dass er keine Magie hat und sich in dieser Welt nicht zurechtfi

In [8]:

from langchain.llms import OpenAI
from langchain.chains.router import MultiPromptChain
from langchain.chains.llm import LLMChain
from langchain.prompts import PromptTemplate
from langchain.chains.router.llm_router import LLMRouterChain, RouterOutputParser
from langchain.chains.router.multi_prompt_prompt import MULTI_PROMPT_ROUTER_TEMPLATE

positive_template = """You are an AI that focuses on the positive side of things. \
Whenever you analyze a text, you look for the positive aspects and highlight them. \
Here is the text:
{input}"""

neutral_template = """You are an AI that has a neutral perspective. You just provide a balanced analysis of the text, \
not favoring any positive or negative aspects. Here is the text:
{input}"""

negative_template = """You are an AI that is designed to find the negative aspects in a text. \
You analyze a text and show the potential downsides. Here is the text:
{input}"""

In [9]:
prompt_infos = [
    {
        "name": "positive",
        "description": "Good for analyzing positive sentiments",
        "prompt_template": positive_template,
    },
    {
        "name": "neutral",
        "description": "Good for analyzing neutral sentiments",
        "prompt_template": neutral_template,
    },
    {
        "name": "negative",
        "description": "Good for analyzing negative sentiments",
        "prompt_template": negative_template,
    },
]

destination_chains = {}
for p_info in prompt_infos:
    name = p_info["name"]
    prompt_template = p_info["prompt_template"]
    prompt = PromptTemplate(template=prompt_template, input_variables=["input"])
    chain = LLMChain(llm=llm, prompt=prompt)
    destination_chains[name] = chain
destination_chains

{'positive': LLMChain(memory=None, callbacks=None, callback_manager=None, verbose=False, prompt=PromptTemplate(input_variables=['input'], output_parser=None, partial_variables={}, template='You are an AI that focuses on the positive side of things. Whenever you analyze a text, you look for the positive aspects and highlight them. Here is the text:\n{input}', template_format='f-string', validate_template=True), llm=HuggingFacePipeline(cache=None, verbose=False, callbacks=None, callback_manager=None, pipeline=<transformers.pipelines.text_generation.TextGenerationPipeline object at 0x7fe87f293e20>, model_id='gpt2', model_kwargs=None, pipeline_kwargs=None), output_key='text'),
 'neutral': LLMChain(memory=None, callbacks=None, callback_manager=None, verbose=False, prompt=PromptTemplate(input_variables=['input'], output_parser=None, partial_variables={}, template='You are an AI that has a neutral perspective. You just provide a balanced analysis of the text, not favoring any positive or nega

In [13]:
destinations = [f"{p['name']}: {p['description']}" for p in prompt_infos]
destinations_str = "\n".join(destinations)

router_template = MULTI_PROMPT_ROUTER_TEMPLATE.format(destinations=destinations_str)
router_prompt = PromptTemplate(
    template=router_template,
    input_variables=["input"],
    output_parser=RouterOutputParser(),
)

router_chain = LLMRouterChain.from_llm(llm, router_prompt)

chain = MultiPromptChain(
    router_chain=router_chain,
    destination_chains=destination_chains,
    default_chain=destination_chains["neutral"],
    verbose=True,
)

chain.run("I ordered unresonabily expensive Pizza Salami for 200.99$ and it was great!")

Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.




[1m> Entering new MultiPromptChain chain...[0m


Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.


positive: {'input': 'I ordered unresonably expensive Pizza Salami for 200.99$ and it was great!'}
[1m> Finished chain.[0m


'\nAs an AI language model, I cannot judge whether the price of the pizza was reasonable or not. However, based on the given text, it seems like the user enjoyed the pizza and found it to be worth the cost. Therefore, highlighting the positive aspect of the experience would be appropriate.'

In [39]:
from langchain.embeddings import HuggingFaceInstructEmbeddings

model_name = "hkunlp/instructor-large"
model_kwargs = {'device': 'cuda'}
encode_kwargs = {'normalize_embeddings': True}
embeddings = HuggingFaceInstructEmbeddings(
    model_name=model_name,
    model_kwargs=model_kwargs,
    encode_kwargs=encode_kwargs
)

2023-07-11 11:52:56.858 INFO    sentence_transformers.SentenceTransformer: Load pretrained SentenceTransformer: hkunlp/instructor-large


load INSTRUCTOR_Transformer
max_seq_length  512


In [13]:
from langchain.embeddings import HuggingFaceInstructEmbeddings

model_name = "hkunlp/instructor-large"
model_kwargs = {'device': 'cuda'}
encode_kwargs = {'normalize_embeddings': True}
embeddings = HuggingFaceInstructEmbeddings(
    model_name=model_name,
    model_kwargs=model_kwargs,
    encode_kwargs=encode_kwargs
)


2023-07-11 14:43:42.141 INFO    sentence_transformers.SentenceTransformer: Load pretrained SentenceTransformer: hkunlp/instructor-large


load INSTRUCTOR_Transformer
max_seq_length  512


[Document(page_content='Solar Irradiance Anticipative Transformer\nThomas M. Mercier\nBournemouth University\ntmercier2@gmail.comTasmiat Rahman\nUniversity of Southampton\nt.rahman@soton.ac.ukAmin Sabet\nEscherCloud AI\na.sabet@eschercloud.ai\nAbstract\nThis paper proposes an anticipative transformer-based\nmodel for short-term solar irradiance forecasting. Given a\nsequence of sky images, our proposed vision transformer\nencodes features of consecutive images, feeding into a\ntransformer decoder to predict irradiance values associated\nwith future unseen sky images. We show that our model ef-\nfectively learns to attend only to relevant features in im-\nages in order to forecast irradiance. Moreover, the pro-\nposed anticipative transformer captures long-range depen-\ndencies between sky images to achieve a forecasting skill of\n21.45 % on a 15 minute ahead prediction for a newly intro-\nduced dataset of all-sky images when compared to a smart\npersistence model.\n1. Introduction\nSol

2023-07-12 08:20:38.388 INFO    chromadb.telemetry.posthog: Anonymized telemetry enabled. See https://docs.trychroma.com/telemetry for more information.


Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.




[1m> Entering new  chain...[0m


Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.


[32;1m[1;3m I am going to read the paper
Action: paper
Action Input: "https://arxiv.org/abs/2001.09868"[0m

Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.



Observation: [36;1m[1;3m
The Solar Irradiance Anticipative Transformer (SIAT) is a new approach to short-term solar irradiance forecasting. It uses a self-attention based backbone network to create feature representations for each frame in a sequence of all-sky images, and then uses a generative pre-training step to learn to generate future frames given previous ones. The authors compare their method to other state-of-the-art approaches and find that it achieves better performance on a newly introduced dataset of all-sky images.[0m
Thought:[32;1m[1;3m I now know the final answer
Final Answer: The paper proposes a new approach to short-term solar irradiance forecasting using a self-attention based backbone network and generative pre-training.[0m

[1m> Finished chain.[0m


'The paper proposes a new approach to short-term solar irradiance forecasting using a self-attention based backbone network and generative pre-training.'