In [18]:
from llama_cpp import Llama
max_tokens = 2048
# llama_llm = Llama(model_path="/Volumes/4Tera_SSD_2022/llm_models/llama-2-7b-chat.Q5_K_M.gguf", chat_format="llama-2")
llm = Llama(model_path="/Volumes/4Tera_SSD_2022/llm_models/orca-2-13b.Q5_K_M.gguf", chat_format="llama-2", n_ctx=max_tokens)
llm.verbose = False # don't print a bunch of stuff

llama_model_loader: loaded meta data with 22 key-value pairs and 363 tensors from /Volumes/4Tera_SSD_2022/llm_models/orca-2-13b.Q5_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: - tensor    0:                token_embd.weight q5_K     [  5120, 32003,     1,     1 ]
llama_model_loader: - tensor    1:              blk.0.attn_q.weight q5_K     [  5120,  5120,     1,     1 ]
llama_model_loader: - tensor    2:              blk.0.attn_k.weight q5_K     [  5120,  5120,     1,     1 ]
llama_model_loader: - tensor    3:              blk.0.attn_v.weight q6_K     [  5120,  5120,     1,     1 ]
llama_model_loader: - tensor    4:         blk.0.attn_output.weight q5_K     [  5120,  5120,     1,     1 ]
llama_model_loader: - tensor    5:            blk.0.ffn_gate.weight q5_K     [  5120, 13824,     1,     1 ]
llama_model_loader: - tensor    6:              blk.0.ffn_up.weight q5_K     [  5120, 13824,     1,     1 ]
llama_model_loader: - tensor    7:            blk.0.ffn_down.weight q6_K     

In [19]:
import time
def stream_prompt(question):
    #ask the question.
    #send 2 messages:
    # - the role the system should play.
    # - the question itself.
    system_message = "You are an assistant who perfectly responds to questions with as many details as possible, without making up facts."
    start_time_seconds = time.time()
    stream = llm.create_chat_completion(
        max_tokens=max_tokens,
        messages = [
            {
                "role": "system",
                "content":  f"<|im_start|>system\n{system_message}<|im_end|>" # Orca has unique prompt formatting.
            },
            {
                "role": "user",
                "content": f"<|im_start|>user\n{question}<|im_end|>\n<|im_start|>assistant"
            }
        ],
        stream = True, #stream each word generated in the answer, so we can immediately start seeing the result (similar to ChatGPT)
    )

    #print out each token/word as it is generated.
    for output in stream:
        choices = output['choices']
        choice = choices[0]
        delta = choice['delta']
        if 'content' in delta:
            print(delta['content'], end='')
    print()

    elapsed_time_seconds = time.time() - start_time_seconds
    print(f'answer generated in {elapsed_time_seconds} seconds')

In [20]:
prompt = """
What is 22 + 45 / 62?
"""

stream_prompt(prompt)



First, we need to perform the division operation: \n45 / 62 = 0.7320931846153844\nThen, we can add this result to 22: \n22 + 0.7320931846153844 = 22.732093184615384\nSo, the final answer is approximately 22.73.
answer generated in 37.61600089073181 seconds


In [16]:
stream_prompt("""
I would like you to add words together, using the following logic to determine the numeric value for each letter of the alphabet:
a = 2
b = 4
c = 6
Based on that you should be able to infer that d = 8, etc.

With that understanding, you should now learn that adding 2 words would look similar to the following examples:
bac + dad = 30
da + bc = 20

What is
cat + dog?
""")
#4+2+6  + 8+2+8 = 12 + 18 = 30


To find the numeric value of the word "cat" using the given logic, we can break it down into its individual letters: c (2) and a (1). The sum of these values is 3. Now, to find the numeric value for the word "dog," we can do the same: d (4), o (6), and g (7). The sum of these values is 17.

Using the logic demonstrated in the examples provided, adding two words together involves finding the sum of their corresponding numeric values. In this case, the sum would be 3 (cat) + 17 (dog) = 20. Therefore, cat + dog equals 20.
answer generated in 48.747461795806885 seconds


In [24]:
stream_prompt("""
I would like you to determine how to calculate the value of a letter in the evaluating the following examples, and finding a pattern:
a = 2
b = 4
c = 6
d = 8
e = 10
f = 12
g = 14
h = 16
i = 18
j = 20
k = 22
l = 24
m = 26
n = 28
o = 30
p = 32
q = 34
r = 36
s = 38
t = 40
u = 42
v = 44
w = 46
x = 48
y = 50
z = 52

Based on the pattern you find, please list out the value for first 10 letters of the alphabet.
If your response includes a value for a letter that doesn't match the above examples, you should understand that the pattern you came up with is incorrect, and try finding another pattern.
If needed, try coming up with 3 possible patterns.  List out each pattern you come up with, and why it works or doesn't work.
""")



To determine the value of a letter in the given examples, we can observe that there is an increasing difference between consecutive letters. Let's analyze the differences:

d - b = 8 - 4 = 4
e - d = 10 - 8 = 2
f - e = 12 - 10 = 2
g - f = 14 - 12 = 2
h - g = 16 - 14 = 2
i - h = 18 - 16 = 2
j - i = 20 - 18 = 2
k - j = 22 - 20 = 2
l - k = 24 - 22 = 2
m - l = 26 - 24 = 2
n - m = 28 - 26 = 2
o - n = 30 - 28 = 2
p - o = 32 - 30 = 2
q - p = 34 - 32 = 2
r - q = 36 - 34 = 2
s - r = 38 - 36 = 2
t - s = 40 - 38 = 2
u - t = 42 - 40 = 2
v - u = 44 - 42 = 2
w - v = 46 - 44 = 2
x - w = 48 - 46 = 2
y - x = 50 - 48 = 2
z - y = 52 - 50 = 2

The pattern we observe is that the difference between consecutive letters increases by 2 each time. We can use this pattern to find the value of the first 10 letters:

a = 2 (difference between a and b)
b = 4 (difference between b and c)
c = 6 (difference between c and d)
d = 8 (difference between d and e)
e = 10 (difference between e and f)
f = 12 (difference betw

In [25]:
stream_prompt("""
I would like you to determine how to calculate the value of a letter in the evaluating the following examples, and finding a pattern:
a = 2
b = 4
c = 6
d = 8
e = 10
f = 12
g = 14
h = 16
i = 18
j = 20
k = 22
l = 24
m = 26
n = 28
o = 30
p = 32
q = 34
r = 36
s = 38
t = 40
u = 42
v = 44
w = 46
x = 48
y = 50
z = 52

Based on the pattern you find, please list out the value for first 10 letters of the alphabet.
If your response includes a value for a letter that doesn't match the above examples, you should understand that the pattern you came up with is incorrect, and try finding another pattern.
Don't list out the calculation and reasoning for every letter in the alphabet.
Just list out the value for first 10 letters of the alphabet.
""")



To determine the value of each letter, we can observe that there is an increasing difference of 6 between consecutive letters. Let's find the value of the first 10 letters:

a = 2 (first letter)
b = 2 + 6 = 8 (second letter)
c = 8 + 6 = 14 (third

KeyboardInterrupt: 

In [31]:
def count_tokens(text):
    tokens = llm.tokenize(text.encode('utf-8'))
    return len(tokens)

count_tokens("hello I am jason")

6