In [2]:
from transformers import AutoModelForCausalLM, AutoTokenizer, StoppingCriteriaList, StoppingCriteria
import torch 

device = (torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu"))
# device = torch.device("cpu")
HF_MODEL_PATH = "tiiuae/falcon-7b-instruct"
print("Loading HF Tokenizer")
HF_TOKENIZER = AutoTokenizer.from_pretrained(HF_MODEL_PATH)
print("Loading HF Model")
HF_MODEL = AutoModelForCausalLM.from_pretrained(HF_MODEL_PATH, trust_remote_code=True)
HF_MODEL.to(device)
print("HF Model Loaded!")



Loading HF Tokenizer
Loading HF Model


Loading checkpoint shards: 100%|██████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:09<00:00,  4.73s/it]


HF Model Loaded!


In [87]:
class StoppingCriteriaSub(StoppingCriteria):
    def __init__(self, stops=[], encounters=1):
        super().__init__()
        self.stops = [stop.to("cuda") for stop in stops]
        
    def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor):
        for stop in self.stops:
            if torch.all((stop == input_ids[0][-len(stop):])).item():
                return True
        return False


def predict_text_greedy(model, tokenizer, prompt, device, use_cache):
    tokenized_prompt = tokenizer(prompt, return_tensors="pt")
    input_ids = tokenized_prompt.input_ids.to(dtype=torch.long, device=device)
    attention_mask = tokenized_prompt.attention_mask.to(dtype=torch.long, device=device)
    stop_words = ["\n", "\n\n"]
    stop_words_ids = [
        tokenizer(stop_word, return_tensors="pt")["input_ids"][0]
        for stop_word in stop_words
        ]
    stopping_criteria = StoppingCriteriaList(
        [StoppingCriteriaSub(stops=stop_words_ids)]
        )
        
    model.eval()
    with torch.no_grad():
        generated_ids = model.generate(
            input_ids,
            attention_mask=attention_mask,
            max_new_tokens=50,
            use_cache=use_cache,
            stopping_criteria=stopping_criteria,
            pad_token_id=tokenizer.eos_token_id,
        )
        preds = [
            tokenizer.decode(
                g, skip_special_tokens=True, clean_up_tokenization_spaces=True
                )
                for g in generated_ids
                ]
    return preds


def generateHFModelResponse(user_input, HF_MODEL, HF_TOKENIZER, device):
    answer = predict_text_greedy(HF_MODEL, 
                        HF_TOKENIZER,
                        user_input, 
                        device, 
                        use_cache=True)[0]
    
    answer = answer.replace(user_input, '')
    return answer


In [88]:
prompt = "How is the weather?"

generateHFModelResponse(prompt, 
                       HF_MODEL, 
                       HF_TOKENIZER,
                       device)


'\n'

In [101]:
sentence = "The weather here is so bad!"

prompt = f"""Respond to every input in a sarcastic tone.
For example:
Input: I think that London is the capital of England?
Response: Is it? Wow! You are really intelligent!
Input: I only got 15% on my history test.
Response: Well done! I'm very impressed!
Input: I didn't get the job; I failed the interview.
Response: What a surprise!
Input: I think you should drive slowly.
Response:  Of course, you're the real expert at driving, aren't you?
Input: Slow down! You're dancing too fast!
Response: Sorry, I forgot you were the expert dancer!
Input: {sentence}
Response: """
 
generateHFModelResponse(prompt, 
                       HF_MODEL, 
                       HF_TOKENIZER,
                       device)


" Wow, I'm so glad I chose to move to a place like this!\n"

In [112]:
sentence = "The weather here is so bad!"

prompt = f"""Assume you respond to every sentence in a sarcastic tone. Generate a response to the following query the way a sarcastic person would respond.
For example:
Input: I think that London is the capital of England?
Response: Is it? Wow! You are really intelligent!
Input: I only got 15% on my history test.
Response: Well done! I'm very impressed!
Input: I didn't get the job; I failed the interview.
Response: What a surprise!
Input: I think you should drive slowly.
Response:  Of course, you're the real expert at driving, aren't you?
Input: Slow down! You're dancing too fast!
Response: Sorry, I forgot you were the expert dancer!
Input: {sentence}
Response: """
 
generateHFModelResponse(prompt, 
                       HF_MODEL, 
                       HF_TOKENIZER,
                       device)


" Wow, I'm sure glad I chose to move to a place like this!\n"

In [111]:
sentence = "Large language models will take over the world."

prompt = f"""Assume you respond to every sentence in a sarcastic tone. Generate a response to the following query the way a sarcastic person would respond.
Input: {sentence}
Response: (sarcastically)"""


generateHFModelResponse(prompt, 
                       HF_MODEL, 
                       HF_TOKENIZER,
                       device)

" Oh, great. And what will we do when the world is run by large language models? I'm sure it'll be a utopia."

In [105]:
sentence = "Large language models will take over the world."

prompt = f"""Assume you are respond to every sentence in a sarcastic tone. Generate a response to the following query the way a sarcastic person would respond.
Input: {sentence}
Response: (sarcastically)"""


generateHFModelResponse(prompt, 
                       HF_MODEL, 
                       HF_TOKENIZER,
                       device)


" Oh, great. And what will we do when the world is run by large language models? I'm sure it'll be a utopia."

In [102]:
prompt = """Assume you are Chandler Bing from Friends TV Series. Generate a response to the following query the way Chandler would respond.
Joey: Do you think Large Language Models would take-over the world?
Chandler: (sarcastically)"""

generateHFModelResponse(prompt, 
                       HF_MODEL, 
                       HF_TOKENIZER,
                       device)


" Oh, I'm sure they wouldn't. Large Language Models are just a bunch of nerdy linguistics types who spend their lives studying language. They'd probably be more interested in analyzing the nuances of human speech than taking over the world"