In [None]:
from huggingface_hub import snapshot_download
from pathlib import Path

mistral_models_path = Path.home().joinpath('mistral_models', 'Nemo-Instruct')
mistral_models_path.mkdir(parents=True, exist_ok=True)

snapshot_download(repo_id="mistralai/Mistral-Nemo-Instruct-2407", allow_patterns=["params.json", "consolidated.safetensors", "tekken.json"], local_dir=mistral_models_path)

from mistral_inference.transformer import Transformer
from mistral_inference.generate import generate

from mistral_common.tokens.tokenizers.mistral import MistralTokenizer
from mistral_common.protocol.instruct.messages import UserMessage
from mistral_common.protocol.instruct.request import ChatCompletionRequest

tokenizer = MistralTokenizer.from_file(f"{mistral_models_path}/tekken.json")
model = Transformer.from_folder(mistral_models_path)

In [None]:
def promptModel(prompt, max_tokens=512, temperature=0.0):
    completion_request = ChatCompletionRequest(messages=[UserMessage(content=prompt)])
    tokens = tokenizer.encode_chat_completion(completion_request).tokens
    out_tokens, _ = generate([tokens], model, max_tokens=max_tokens, temperature=temperature, eos_id=tokenizer.instruct_tokenizer.tokenizer.eos_id)
    result = tokenizer.instruct_tokenizer.tokenizer.decode(out_tokens[0])
    return result
#prompt = "How expensive would it be to ask a window cleaner to clean all windows in Paris. Make a reasonable guess in US Dollar."
#completion_request = ChatCompletionRequest(messages=[UserMessage(content=prompt)])
#tokens = tokenizer.encode_chat_completion(completion_request).tokens
#out_tokens, _ = generate([tokens], model, max_tokens=64, temperature=0.35, eos_id=tokenizer.instruct_tokenizer.tokenizer.eos_id)
#result = tokenizer.decode(out_tokens[0])
#print(result)
promptModel('How many "r"s are there in strawberry?')

In [None]:
citation_pairs = {
    "Apollo 13 was led by J. Lovell.":
    "The mission was commanded by Jim Lovell, with Jack Swigert as command module (CM) pilot and Fred Haise as lunar module (LM) pilot. Swigert was a late replacement for Ken Mattingly, who was grounded after exposure to rubella. ",
    "Apollo looped around the Moon instead.":
    "The crew, supported by backup systems on the lunar module (LM), instead looped around the Moon in a circumlunar trajectory and returned safely to Earth on April 17.",
    "They returned safely to Earth.":
    "The crew, supported by backup systems on the lunar module (LM), instead looped around the Moon in a circumlunar trajectory and returned safely to Earth on April 17."
}

for _phrase_ in citation_pairs:
    _long_citation_ = citation_pairs[_phrase_]
    _model_response_ = promptModel('My citations are too long.  " + \
                                   "Shorten the following citation to just the part that supports the phrase.  " + \
                                   "The citation should use the exact words in the supplied citation.\n\nPhrase:"' + _phrase_ + '"\n\nSupplied Citation: "' + _long_citation_ + '"')
    print(f'{_model_response_=} {_model_response_ in _long_citation_}')
