In [1]:
import torch
from peft import PeftModel    
from transformers import AutoModelForCausalLM, AutoTokenizer, LlamaTokenizer, StoppingCriteria, StoppingCriteriaList, TextIteratorStreamer



In [2]:
adapters_name  = "experiments/checkpoint-8800"
model_name = "mistralai/Mistral-7B-v0.1"


print(f"Starting to load the model {model_name} into memory")

m = AutoModelForCausalLM.from_pretrained(
    model_name,
    load_in_4bit=True,
    torch_dtype=torch.bfloat16,
    device_map='auto'
)
m = PeftModel.from_pretrained(m, adapters_name)
# m = m.merge_and_unload()
tok = LlamaTokenizer.from_pretrained(model_name)
tok.bos_token_id = 1
stop_token_ids = [0]

print(f"Successfully loaded the model {model_name} into memory")

Starting to load the model mistralai/Mistral-7B-v0.1 into memory


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Successfully loaded the model mistralai/Mistral-7B-v0.1 into memory


In [3]:


def gen(model, text: str):
    inputs = tok(text, return_tensors="pt").to('cuda')
    inputs_length = len(inputs["input_ids"][0])
    with torch.inference_mode():
        outputs = model.generate(**inputs, max_new_tokens=256)
    return tok.decode(outputs[0][inputs_length:], skip_special_tokens=True)
     


In [4]:
text = '''### Instruction:  Below is a story idea. Write a short story based on this context.

### Input:
Happy kid was playing at the park, but then he broke his leg, and his life got completely changed.
'''


print(gen(m, text))

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
spaces_between_special_tokens is deprecated and will be removed in transformers v5. It was adding spaces between `added_tokens`, not special tokens, and does not exist in our fast implementation. Future tokenizers will handle the decoding process on a per-model rule.



### Response:
`` And then , there were none . '' <newline> <newline> `` What do you mean ? '' <newline> <newline> `` I mean , there were none . '' <newline> <newline> `` But , there were n't none . There were two . '' <newline> <newline> `` No , there were none . '' <newline> <newline> `` But , there were two . '' <newline> <newline> `` No , there were none . '' <newline> <newline> `` But , there were two . '' <newline> <newline> `` No , there were none . '' <newline> <newline> `` But , there were two . '' <newline> <newline> `` No , there were none . '' <newline> <newline> `` But , there were two . '' <newline> <newline> `` No , there were none . '' <newnewline> <newline> `` But , there were two . '' <newline> <newline> `` No , there were none . '' <newline> <newline> `` But
