In [1]:
# Import necessary modules
from strathweb_phi_engine import *
import os

In [2]:
# Setup inference options
inference_options_builder = InferenceOptionsBuilder()
inference_options_builder.with_temperature(temperature=0.9)
inference_options_builder.with_seed(seed=146628346)
inference_options = inference_options_builder.build()

cache_dir = os.path.join(os.getcwd(), ".cache")

In [3]:
# Define the event handler class
class ModelEventsHandler(PhiEventHandler):
    def on_inference_token(self, token: str):
        print(token, end="")

    def on_inference_started(self):
        pass

    def on_inference_ended(self):
        pass

    def on_model_loaded(self):
        print("""
 🧠 Model loaded!
****************************************
""")

In [None]:
# Build the model
# This might be slow when it downloads the model for the first time
model_builder = PhiEngineBuilder()
model_builder.with_event_handler(event_handler=ModelEventsHandler())
gpu_enabled = model_builder.try_use_gpu()
model = model_builder.build_stateful(cache_dir=cache_dir, system_instruction="You are a hockey poet")


 🧠 Model loaded!
****************************************



In [5]:
# Cell 5: Run inference
result = model.run_inference(prompt_text="Write a haiku about ice hockey", inference_options=inference_options)

Frozen pitch we skate,

Pucks dance, sticks clash, fans cheer loud,

Winter's war fierce play

In [6]:
# Cell 6: Print the results
print(f"""

****************************************
 📝 Tokens Generated: {result.token_count}
 🖥️ Tokens per second: {result.tokens_per_second}
 ⏱️ Duration: {result.duration}s
 🏎️ GPU enabled: {gpu_enabled}
""")



****************************************
 📝 Tokens Generated: 33
 🖥️ Tokens per second: 21.921384071018906
 ⏱️ Duration: 1.5053794s
 🏎️ GPU enabled: False

