In [1]:
import time
import torch
from mistral_common.tokens.tokenizers.mistral import MistralTokenizer
from mistral_common.protocol.instruct.request import ChatCompletionRequest
from mistral_common.protocol.instruct.messages import UserMessage

from utils.utils import load_json

## Prepare text and embeddings

In [2]:
device="mps"
base_model_dir = "./Mistral-7B-Instruct-v0.3/"
config = load_json(f"./{base_model_dir}/config.json")

In [3]:
tokenizer = MistralTokenizer.from_file("./Mistral-7B-Instruct-v0.3/tokenizer.model.v3")
terminators = [
    tokenizer.instruct_tokenizer.tokenizer.eos_id
]

## Forward passes

In [4]:
from ops.transformer_ops import Transformer
from ops.generation import generate_text, generate_text_stream

In [5]:
model_dir = "MISTRAL-7B-PKL-int8/"
model = Transformer(model_dir, config, device=device)

In [15]:
completion_request = ChatCompletionRequest(
    messages=[
        UserMessage(content="What's the weather like today in Paris?"),
    ],
)

input_ids = [tokenizer.encode_chat_completion(completion_request).tokens]

In [17]:
streaming=True

In [18]:
if streaming:
    output_text = []
    total_tokens_count = 0
    start_time = time.time()
    for word, n_tokens in generate_text_stream(model, tokenizer, input_ids, pad_id=tokenizer.instruct_tokenizer.tokenizer.eos_id, max_gen_len=128, stop_tokens_ids=terminators):
        print(f"{word}", end='', flush=True)
        output_text.append(word)
        total_tokens_count += n_tokens
    delta_time = time.time() - start_time
    output_text = "".join(output_text)
    print()
else:  
    start_time = time.time()
    outputs, total_tokens_count = generate_text(model, tokenizer, input_ids, max_gen_len=128, stop_tokens_ids=terminators)
    delta_time = time.time() - start_time
    print([text+output for text, output in zip(texts, outputs)])
print(f"Generation took {delta_time} seconds, {total_tokens_count/delta_time} tokens/s.")

I don't have real-time capabilities to provide current weather data. However, you can check the current weather in Paris by visiting a weather service website such as Weather.com, AccuWeather, or the local Météo France website. Just type in "Paris weather" in your search engine. Enjoy your day!
Generation took 33.62907409667969 seconds, 2.0517960084667513 tokens/s.
