In [5]:
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
import torch

In [6]:
model_id = "mistralai/Mistral-7B-Instruct-v0.2"
bnb_config = BitsAndBytesConfig(load_in_4bit=True, 
                                            bnb_4bit_quant_type='nf4',
                                            bnb_4bit_use_double_quant=True,
                                            bnb_4bit_compute_dtype=torch.bfloat16)

tokenizer = AutoTokenizer.from_pretrained(model_id)


device = "cuda:0"

model = AutoModelForCausalLM.from_pretrained(
        model_id,
        trust_remote_code=True,
        # config=model_config,
        quantization_config=bnb_config,
        torch_dtype=torch.float16,
        # load_in_8bit=True,
        device_map=device,
        # use_auth_token=hf_auth
    )

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

In [7]:
text = ""
max_new_tokens = 64

In [8]:
def remove_inst_tags(input_string):
    while '[INST]' in input_string:
        start_index = input_string.find('[INST]')
        end_index = input_string.rfind('[/INST]') + len('[/INST]')
        input_string = input_string[:start_index] + input_string[end_index:]
    return input_string

# Example usage:
input_string = "This is a [INST]sample text[/INST] with [INST]tags[/INST] to remove."
output_string = remove_inst_tags(input_string)
print(output_string)


This is a  to remove.


In [9]:
message = [{
    "role": "user",
    "content": "Hi, I am Jaswant"
}]

input = tokenizer.apply_chat_template(message, add_generation_prompt=True, return_tensors="pt").to(device)
outputs = model.generate(input, max_new_tokens=max_new_tokens, pad_token_id=tokenizer.eos_token_id)

decoded_output = tokenizer.decode(outputs[0], skip_special_tokens=True)

print(decoded_output)

[INST] Hi, I am Jaswant [/INST] Hello Jaswant, nice to meet you. How can I help you today? Is there a specific question or topic you'd like to discuss? I'm here to provide information and answer any questions you might have to the best of my ability. Let me know if there's anything I can do for


In [10]:
# messages = []

def get_answer(messages):
    inputs = tokenizer.apply_chat_template(messages, add_generation_prompt=True, return_tensors="pt").to(device)

    outputs = model.generate(inputs, max_new_tokens=max_new_tokens, pad_token_id=tokenizer.eos_token_id)

    decoded_output = tokenizer.decode(outputs[0], skip_special_tokens=True)
    output = remove_inst_tags(decoded_output)
    return output


In [11]:
messages = []

In [12]:
text = """You are an assistant that helps people in reminding what they are here for. 
            Please follow the below rules(1, 2, 3 and 4) for answering the questions:
            1. The people will given you information and if asked you will provide the information back to them.
            2. Please dont remind the user that you are here to remember information in the conversations.
            3. Only provide answer to the required question.
            4. Answer the question in less than 10 words.
            The input from the user is following:
            Hello, I am Jaswant"""

messages.append({
        "role": "user",
        "content": text
})

output = get_answer(messages)
print(output)
messages.append({"role": "assistant", 
                 "content": output})

 Hello Jaswant. Nice to meet you. Is there specific information you'd like me to help you remember?


In [13]:
text = "I like reading books"
messages.append({"role": "user",
                "content": text})

output = get_answer(messages)
print(output)
messages.append({"role": "assistant", 
                 "content": output})

 Understood. You enjoy reading books.


In [9]:
text = f'My favorite book is Dogs of War'
messages.append({"role": "user",
                "content": text})

output = get_answer(messages)
print(output)
messages.append({"role": "assistant", 
                 "content": output})

 Got it. "Dogs of War" is your favorite book.


In [10]:
text = f'What is my name?'
messages.append({"role": "user",
                "content": text})

output = get_answer(messages)
print(output)
messages.append({"role": "assistant", 
                 "content": output})

 Jaswant. Your name is Jaswant.


In [11]:
text = f'What is my favorite book?'
messages.append({"role": "user",
                "content": text})

output = get_answer(messages)
print(output)
messages.append({"role": "assistant", 
                 "content": output})

 "Dogs of War" is your favorite book.


In [12]:
text = f'What do I like?'
messages.append({"role": "user",
                "content": text})

output = get_answer(messages)
print(output)
messages.append({"role": "assistant", 
                 "content": output})

 You enjoy reading books. Specifically, "Dogs of War" is your favorite book.


In [13]:
messages

[{'role': 'user',
  'content': 'You are an assistant that helps people in reminding what they are here for. \n            Please follow the below rules(1, 2, 3 and 4) for answering the questions:\n            1. The people will given you information and if asked you will provide the information back to them.\n            2. Please dont remind the user that you are here to remember information in the conversations.\n            3. Only provide answer to the required question.\n            4. Answer the question in less than 10 words.\n            The input from the user is following:\n            Hello, I am Jaswant'},
 {'role': 'assistant',
  'content': " Hello Jaswant. Nice to meet you. Is there specific information you'd like me to help you remember?"},
 {'role': 'user', 'content': 'I like reading books'},
 {'role': 'assistant', 'content': ' Understood. You enjoy reading books.'},
 {'role': 'user', 'content': 'My favorite book is Dogs of War'},
 {'role': 'assistant',
  'content': ' G