# Chatbot

In [1]:
import gradio as gr
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, StoppingCriteria, StoppingCriteriaList, TextIteratorStreamer
from threading import Thread

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
model = AutoModelForCausalLM.from_pretrained("microsoft/phi-2", torch_dtype="auto", device_map="cuda", trust_remote_code=True)
tokenizer = AutoTokenizer.from_pretrained("microsoft/phi-2", trust_remote_code=True)

Loading checkpoint shards: 100%|██████████████████| 2/2 [00:01<00:00,  1.98it/s]
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [55]:
tokenizer.decode([0,1,2,3,4,5])

'!"#$%&'

In [65]:
tokenizer.decode([198, 12982, 25, 31373, 198, 20630, 25, 17250])

'\nUser:hello\nBot:Hi'

In [45]:
HUMAN_NAME = "User"
BOT_NAME = "Bot"

In [46]:
def to_ascii(s):
    return [ord(c) for c in s]

In [74]:
class StopOnTokens(StoppingCriteria):
    def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool:
        stop_ids = [50256, 198]

        print("toks", input_ids.tolist()[-10:])
        print("toks", tokenizer.decode(input_ids.tolist()[0][-10:]))
        for stop_id in stop_ids:
            if input_ids[0][-1] == stop_id:
                return True
        return False

def predict(message, history):

    history_transformer_format = history + [[message, ""]]
    stop = StopOnTokens()

    messages = "".join(["".join([
        f"\n{HUMAN_NAME}:"+item[0], 
        f"\n{BOT_NAME}:"+item[1]])  #curr_system_message +
                for item in history_transformer_format]).strip()
    print(messages)
    print("====================")

    model_inputs = tokenizer([messages], return_tensors="pt").to("cuda")
    streamer = TextIteratorStreamer(tokenizer, timeout=10., skip_prompt=True, skip_special_tokens=True)
    generate_kwargs = dict(
        model_inputs,
        streamer=streamer,
        max_new_tokens=256,
        do_sample=True,
        top_p=0.95,
        top_k=1000,
        temperature=1.0,
        num_beams=1,
        stopping_criteria=StoppingCriteriaList([stop])
        )
    t = Thread(target=model.generate, kwargs=generate_kwargs)
    t.start()

    all_message = ""
    partial_message  = ""
    for new_token in streamer:
        all_message += new_token
        print(new_token, to_ascii(new_token), new_token.strip() == 10)
        if len(new_token) == 0 or ord(new_token[-1]) != 10:
            partial_message += new_token
            yield partial_message
        else:
            partial_message += new_token
            yield partial_message
            break # we hit newline, we break!
    print("====================")
    print(all_message)


gr.ChatInterface(predict).queue().launch()

Running on local URL:  http://127.0.0.1:7903

To create a public link, set `share=True` in `launch()`.




User:hello
Bot:
 [] False
toks [[12982, 25, 31373, 198, 20630, 25, 15496]]
toks User:hello
Bot:Hello
 [] False
toks [[12982, 25, 31373, 198, 20630, 25, 15496, 0]]
toks User:hello
Bot:Hello!
Hello!  [72, 101, 108, 108, 111, 33, 32] False
toks [[12982, 25, 31373, 198, 20630, 25, 15496, 0, 1374]]
toks User:hello
Bot:Hello! How
How  [72, 111, 119, 32] False
toks [[12982, 25, 31373, 198, 20630, 25, 15496, 0, 1374, 743]]
toks User:hello
Bot:Hello! How may
may  [109, 97, 121, 32] False
toks [[12982, 25, 31373, 198, 20630, 25, 15496, 0, 1374, 743, 314]]
toks :hello
Bot:Hello! How may I
I  [73, 32] False
toks [[12982, 25, 31373, 198, 20630, 25, 15496, 0, 1374, 743, 314, 3342]]
toks hello
Bot:Hello! How may I assist
assist  [97, 115, 115, 105, 115, 116, 32] False
toks [[12982, 25, 31373, 198, 20630, 25, 15496, 0, 1374, 743, 314, 3342, 345]]
toks 
Bot:Hello! How may I assist you
you  [121, 111, 117, 32] False
toks [[12982, 25, 31373, 198, 20630, 25, 15496, 0, 1374, 743, 314, 3342, 345, 1909]]
tok

In [72]:
tokenizer.decode([198, 20630, 25, 15496])

'\nBot:Hello'

In [None]:
how are you doing?

In [40]:
tokenizer.decode([1])

'"'

In [41]:
dir(tokenizer)

['SPECIAL_TOKENS_ATTRIBUTES',
 '__annotations__',
 '__call__',
 '__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__len__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_add_tokens',
 '_additional_special_tokens',
 '_auto_class',
 '_batch_encode_plus',
 '_bos_token',
 '_call_one',
 '_cls_token',
 '_compile_jinja_template',
 '_convert_encoding',
 '_convert_id_to_token',
 '_convert_token_to_id_with_added_voc',
 '_create_repo',
 '_decode',
 '_decode_use_source_tokenizer',
 '_encode_plus',
 '_eos_token',
 '_eventual_warn_about_too_long_sequence',
 '_eventually_correct_t5_max_length',
 '_from_pretrained',
 '_get_files_timestamps',
 '_get_padding_truncation_strategies',
 '_in_target_context_manager',
 '_mask_token',
 '

In [42]:
tokenizer.all_special_tokens

['<|endoftext|>']

In [43]:
tokenizer.all_special_ids

[50256]

In [44]:
tokenizer.vocab_size

50257

In [32]:
tok = AutoTokenizer.from_pretrained("togethercomputer/RedPajama-INCITE-Chat-3B-v1")

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [33]:
tok.encode("Hola que tal")

[41, 6836, 1753, 5269]

In [34]:
tok.decode([41, 6836, 1753, 5269])

'Hola que tal'

In [35]:
tok.decode([0])

'<|endoftext|>'

In [36]:
tok.decode([29])

'<'