# Overview

This notebook shows how to use the microsoft/phi-2 model to generate Chat/Python on MacOs devices


In [None]:
import os

local_path = !pwd
workspace_dir = os.path.abspath(os.path.join(local_path[0], "..", ".."))
workspace_dir

In [None]:
model_path = os.path.join(workspace_dir, '.cache', 'models', 'microsoft', 'phi-2')
if not os.path.exists(model_path):
    !HF_HUB_ENABLE_HF_TRANSFER=1 huggingface-cli download microsoft/phi-2 --local-dir {model_path} --local-dir-use-symlinks False

In [None]:
%pip install mlx -q

In [None]:
import sys

sys.path.append(os.path.join(workspace_dir, "models"))

In [None]:
from microsoft_phi2_model import load, generate_step

In [None]:
model, tokenizer = load(model_path)

In [None]:
import mlx
import mlx.core as mx
import time

prompt = """\
Suppose Alice originally had 3 apples, then Bob gave Alice 7 apples, then Alice gave Cook 5 apples, and then Tim gave Alice 3x the amount of apples Alice had. How many apples does Alice have now?\
Let's think step by step:\
"""


def generate(prompt, temp: float = 0.1, max_tokens: int = 512, verbose=True):
    prompt = mx.array(tokenizer.encode(prompt))

    tic = time.time()
    tokens = []
    skip = 0
    REPLACEMENT_CHAR = "\ufffd"

    for (token, prob), n in zip(generate_step(prompt, model, temp), range(max_tokens)):
        if token == tokenizer.eos_token_id:
            break
        if n == 0:
            prompt_time = time.time() - tic
            tic = time.time()
        tokens.append(token.item())

        if verbose:
            s = tokenizer.decode(tokens)
            if REPLACEMENT_CHAR not in s:
                print(s[skip:], end="", flush=True)
                skip = len(s)

    tokens = tokenizer.decode(tokens).replace(REPLACEMENT_CHAR, "")

    if verbose:
        print(tokens[skip:], flush=True)
        gen_time = time.time() - tic
        print("=" * 10)
        if len(tokens) == 0:
            raise "No tokens generated for this prompt"
        prompt_tps = prompt.size / prompt_time
        gen_tps = (len(tokens) - 1) / gen_time
        print(f"Prompt: {prompt_tps:.3f} tokens-per-sec")
        print(f"Generation: {gen_tps:.3f} tokens-per-sec")

## Chat Format


In [None]:
prompt = """\
Alice: I don't know why, I'm struggling to maintain focus while studying. Any suggestions?
Bob: Well, have you tried creating a study schedule and sticking to it?
Alice: Yes, I have, but it doesn't seem to help much.
Bob:"""
generate(
    prompt,
)

## Instruct Format


In [None]:
generate(
    """\
Write a detailed analogy between mathematics and a lighthouse.
Answer:\
"""
)

# Code format


In [None]:
generate(
    '''
def print_prime(n):
   """
   Print all primes between 1 and n
   """
'''
)