# ONNX Runtime GenAI Interaction Notebook

This notebook is configured to demonstrate the use of an ONNX model for generating responses to predefined inputs using the ONNX Runtime GenAI library.

In [2]:
# Import necessary libraries
import onnxruntime_genai as og
import time

## Load and configure the model, note that these files are not in the repo

In [3]:
# Configuration parameters
model_path = f'./directml/Phi-3-medium-4k-instruct'  # In tests I used medium-4k and medium-128k
# Define search options
search_options = {
    'do_sample': True,
    'max_length': 2048,
    'top_p': 0.9,
    'top_k': 50,
    'temperature': 0.5,
    'repetition_penalty': 1.2
}
chat_template = '<|user|>\n{input} <|end|>\n<|assistant|>'

# Initialize model and tokenizer
model = og.Model(model_path)
tokenizer = og.Tokenizer(model)
tokenizer_stream = tokenizer.create_stream()

RuntimeError: Unknown provider type: dml

## Do a test...

In [36]:
# Predefined messages
messages = [
    'Solve this equation for x: x squared divided by three equals three by first writing the equation and then solving for x.'
]
for message in messages:
    start_time = time.time()  # Start timing

    # Prepare the prompt and encode it to tokens
    prompt = f'{chat_template.format(input=message)}'
    input_tokens = tokenizer.encode(prompt)
    input_token_count = len(input_tokens)

    # Initialize generator parameters and the generator
    params = og.GeneratorParams(model)
    params.set_search_options(**search_options)
    params.input_ids = input_tokens
    generator = og.Generator(model, params)
    
    # Initialize output token count
    output_token_count = 0

    print('\nOutput: ', end='', flush=True)
    output_message = ''

    # Generate response
    while not generator.is_done():
        generator.compute_logits()
        generator.generate_next_token()
        new_token = generator.get_next_tokens()[0]
        output_token_count += 1  # Increment output token count
        next_output = tokenizer_stream.decode(new_token)
        print(next_output, end='', flush=True)
        output_message += next_output

    del generator  # Free up resources
    end_time = time.time()  # End timing

    # Calculate tokens per second
    total_time = end_time - start_time
    total_tokens = input_token_count + output_token_count
    tokens_per_second = total_tokens / total_time if total_time > 0 else 0

    # Print token counts and performance
    print(f"\nInput tokens: {input_token_count}, Output tokens: {output_token_count}")
    print(f"Total tokens: {total_tokens}, Time taken: {total_time:.2f} seconds")
    print(f"Tokens per second: {tokens_per_second:.2f}\n")



Output: The given problem is to solve an algebraic equation where we have "x^2 /3 = [1m3". Let's start with that step-by-step solution now!

Step 1 - Write down the original equation, which has already been done in your question as follows :  
`(X ^ 2) / 3 = 3`.   

Now let’s move on to find 'x'. Here are the steps involved:

Step 2 – Multiply both sides of the equation by `3`, so you can isolate `x²`:    
```     
((X ^ 2)/3)*3= 3*3       
=> X^2 = 9      
```        

This simplifies our expression greatly because multiplying `(X ^ 2)`/3 by `3` just leaves us with `X ^ [0;3mx2)`. Now it looks like a much simpler quadratic equation (one variable).

Next up...

Step 3– Take square root of each side of the new simplified equation (`X^2 = 9`) , remembering when taking roots there will be two solutions (+ or -):         
```          
√(X^2)=±√9           
=> X = +/- 3            
```                             
Therefore, after following these steps carefully, we see that the value o

## Functions to get the speakers to identify and text near where they appear for possible identification

In [10]:
import re

def list_unique_speakers(file_path):
    # Read the content of the file
    with open(file_path, 'r') as file:
        content = file.read()
    
    # Find all unique speaker identifiers
    speakers = re.findall(r"\[SPEAKER_(\d{2})\]", content)
    unique_speakers = sorted(set(speakers))
    
    # Format speakers in the form "SPEAKER_XX"
    formatted_speakers = [f"SPEAKER_{speaker}" for speaker in unique_speakers]
    return formatted_speakers


print(list_unique_speakers('./Transcript/Developer’s Guide to Customizing Microsoft Copilot - Micrososft Build 2024.transcript.txt'))

def get_context_around_speaker(file_path, speaker):
    with open(file_path, 'r') as file:
        lines = file.readlines()
    
    # Find the first occurrence of the speaker
    for i, line in enumerate(lines):
        if speaker in line:
            start_index = max(0, i - 5)  # Ensure start index is within bounds
            end_index = min(len(lines), i + 6)  # Ensure end index is within bounds (i+6 for inclusive slicing)
            return lines[start_index:end_index]
    return []  # Return an empty list if the speaker does not appear

print(get_context_around_speaker('./Transcript/Developer’s Guide to Customizing Microsoft Copilot - Micrososft Build 2024.transcript.txt', list_unique_speakers('./Transcript/Developer’s Guide to Customizing Microsoft Copilot - Micrososft Build 2024.transcript.txt')[0]))
print(get_context_around_speaker('./Transcript/Developer’s Guide to Customizing Microsoft Copilot - Micrososft Build 2024.transcript.txt', list_unique_speakers('./Transcript/Developer’s Guide to Customizing Microsoft Copilot - Micrososft Build 2024.transcript.txt')[1]))

def collect_speaker_contexts(file_path):
    # First, extract unique speakers from the file
    unique_speakers = list_unique_speakers(file_path)
    
    # Now, use the second function to get context for each speaker
    speaker_contexts = {}
    for speaker in unique_speakers:
        # Using the function to fetch context around each speaker's first mention
        context = get_context_around_speaker(file_path, speaker)
        speaker_contexts[speaker] = context
    
    return speaker_contexts

# Example usage:
file_path = './Transcript/Developer’s Guide to Customizing Microsoft Copilot - Micrososft Build 2024.transcript.txt'
speaker_context_dict = collect_speaker_contexts(file_path)
for speaker, context in speaker_context_dict.items():
    print(f"{speaker}:")
    for line in context:
        print(line.strip())
    print("\n")


['SPEAKER_00', 'SPEAKER_01']
["[SPEAKER_00] : [(00:00:00.000, 00:00:23.660)] :  All right, everybody. Welcome, welcome, welcome, welcome. First breakout of Build. I'm Barnum Bora, and I lead the developer advocacy team for Microsoft 365 and Copilot Platform.\n", '[SPEAKER_00] : [(00:00:23.660, 00:00:24.340)] :  advocacy team for Microsoft 365 and Copilot platform.\n', '[SPEAKER_00] : [(00:00:27.920, 00:00:28.120)] :  But without further ado, before I come back and do more things,\n', "[SPEAKER_00] : [(00:00:31.980, 00:00:36.160)] :  I'm going to hand off to my good friend Jeremy Thake, and he's going to walk you through the first half of this session. Thanks, Barno. I appreciate it. And thank you for coming to Build.\n", "[SPEAKER_01] : [(00:00:37.960, 00:00:40.280)] :  So I'm Jeremy Thake. I'm a\n", '[SPEAKER_01] : [(00:00:40.280, 00:00:44.100)] :  principal program manager in the Copilot developer experience team. And a slight\n']
["[SPEAKER_00] : [(00:00:00.000, 00:00:23.660)] :  Al

## Simple generation function

In [16]:
def process_message(message, search_options):
    # Prepare the prompt and encode it to tokens
    prompt = chat_template.format(input=message)
    input_tokens = tokenizer.encode(prompt)

    # Initialize generator parameters and the generator
    params = og.GeneratorParams(model)
    params.set_search_options(**search_options)
    params.input_ids = input_tokens
    generator = og.Generator(model, params)

    # Initialize output message
    output_message = ''

    # Generate response
    while not generator.is_done():
        generator.compute_logits()
        generator.generate_next_token()
        new_token = generator.get_next_tokens()[0]
        next_output = tokenizer.decode(new_token)
        output_message += next_output

    # Free up resources
    del generator

    return output_message

# Example usage:
messages = ["Hello, how are you?"]
for message in messages:
    output = process_message(message, search_options)
    print(output)


 Hello! I'm just a computer program so I don't have feelings or emotions like humans do. But thank you for asking! How can I assist you today?


## Use the generation function with a prompt to identify each of the speakers and output their names

In [33]:
def generate_prompt_responses(file_path):
    
    search_options = {
        'do_sample': True,
        'max_length': 3578,
        'top_p': 0.9,
        'top_k': 5,
        'temperature': 0.5,
        'repetition_penalty': 1.2
    }

    # First, gather the context for each speaker using the previously defined function
    speaker_contexts = collect_speaker_contexts(file_path)
    
    # Iterate through each speaker and their associated context
    responses = {}
    for speaker, context in speaker_contexts.items():
        # Join the context lines into a single string
        context_text = ''.join(context).strip()
        
        # Create the prompt with the current speaker and their context
        prompt = f"""
        The following text is a piece of a transcript. Examine the text and reply with ONLY the full name matching the exact spelling of the person listed as {speaker} and nothing else.
        ----------------------------------------
        {context_text}"""
        
        # Call the existing process_message function with the formatted prompt
        response = process_message(prompt, search_options)
        
        # Store the response for this speaker
        responses[speaker] = response
    
    return responses

# This function can be used as follows:
response_dict = generate_prompt_responses(file_path)
for speaker, response in response_dict.items():
    print(f"{speaker}: {response}")

SPEAKER_00:  Barnum Bora
SPEAKER_01:  Jeremy Thake


## Create the final file

In [35]:
import os

def update_speakers_with_real_names(file_path):
    # Assume responses are in the form of speaker names we want to replace in the original file
    response_dict = generate_prompt_responses(file_path)
    
    # Read the original file content
    with open(file_path, 'r') as file:
        content = file.readlines()
    
    # Replace speaker labels with actual names based on response_dict
    updated_content = []
    for line in content:
        for speaker, real_name in response_dict.items():
            if speaker in line:
                # Replace the first occurrence of speaker in the line with the real name
                line = line.replace(speaker, real_name.strip(), 1)
        updated_content.append(line)
    
    # Create a new file path with .FINAL before the extension
    base, ext = os.path.splitext(file_path)
    new_file_path = f"{base}.FINAL{ext}"
    
    # Write the updated content to the new file
    with open(new_file_path, 'w') as file:
        file.writelines(updated_content)

    return new_file_path

# Usage:
new_file_path = update_speakers_with_real_names(file_path)
print(f"Updated file saved as: {new_file_path}")


Updated file saved as: ./Transcript/Developer’s Guide to Customizing Microsoft Copilot - Micrososft Build 2024.transcript.FINAL.txt
