In [1]:
from llama_cpp import Llama
import numpy as np
from matplotlib import pyplot as plt
import ctypes
import torch
from torch.nn import functional as F

In [2]:
model_bin = "/media/captdishwasher/Samshmung/horenbergerb/llama/llama.cpp/models/WizardLM-13B-Uncensored-Q5_1-GGML/WizardML-Unc-13b-Q5_1.bin"

LLM = Llama(model_path=model_bin, n_ctx=2048, n_threads=7, use_mmap=False, n_batch=512)

llama.cpp: loading model from /media/captdishwasher/Samshmung/horenbergerb/llama/llama.cpp/models/WizardLM-13B-Uncensored-Q5_1-GGML/WizardML-Unc-13b-Q5_1.bin
llama_model_load_internal: format     = ggjt v1 (latest)
llama_model_load_internal: n_vocab    = 32001
llama_model_load_internal: n_ctx      = 2048
llama_model_load_internal: n_embd     = 5120
llama_model_load_internal: n_mult     = 256
llama_model_load_internal: n_head     = 40
llama_model_load_internal: n_layer    = 40
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: ftype      = 9 (mostly Q5_1)
llama_model_load_internal: n_ff       = 13824
llama_model_load_internal: n_parts    = 1
llama_model_load_internal: model size = 13B
llama_model_load_internal: ggml ctx size = 9534512.58 KB
llama_model_load_internal: mem required  = 11359.05 MB (+ 1608.00 MB per state)
...


Initializing CLBlast (First Run)...
Attempting to use: Platform=0, Device=0 (If invalid, program will crash)
Using Platform: NVIDIA CUDA Device: NVIDIA GeForce RTX 3060 Ti


.................................................................................................
llama_init_from_file: kv self size  = 1600.00 MB
AVX = 1 | AVX2 = 1 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 1 | VSX = 0 | 


In [10]:
def get_logits_and_probs(prompt):
    tokens = LLM.tokenize(prompt.encode('utf-8'))
    LLM.reset()
    LLM.eval(tokens)
    logits = LLM.eval_logits
    logits = np.array(logits[0])
    probs=F.softmax(torch.from_numpy(logits)).numpy()
    return logits, probs

In [11]:
def get_most_likely_token(probs):
    ind = np.argmax(probs)
    c_token = [(ctypes.c_int)(*[ind])]
    return LLM.detokenize(c_token)

In [12]:
def classify_statement(statement):
    prompt = """I am going to give you a statement, and you are going to classify it. The possible classifications are Remark, Question, and Command. Respond "Classification: X" where X is the most likely classification. The input statement is "{}"

    ### Response:Classification:""".format(statement)
    logits, probs = get_logits_and_probs(prompt)
    # ' Question' or ' Command' or ' Rem'
    return get_most_likely_token(probs)

In [13]:
def classify_command(command):
    prompt = """I am going to give you a command, and you are going to classify it. The possible classifications are Heading, Speed, and Unknown. Respond "Classification: X" where X is the most likely classification. The input command is "{}"

### Response:Classification:""".format(command)
    logits, probs = get_logits_and_probs(prompt)
    # ' Un' or ' Speed' or ' Head'
    return get_most_likely_token(probs)

In [14]:
def set_heading(command):
    print("Command to set heading received")

def set_speed(command):
    print("Command to set speed received")

def handle_unknown_command(command):
    print("Unknown command recieved")

def handle_question(question):
    prompt = """I am going to give you a question, and you are going to make up a sci-fi, Star Trek themed response. Respond "Response: X" where X is the response you made up. The input question is "{}"

### Response:Reponse:""".format(question)
    tokens = LLM.tokenize(prompt.encode('utf-8'))
    count = 0
    for token in LLM.generate(tokens, top_k=40, top_p=0.8, temp=1.2, repeat_penalty=1.1):
        print(LLM.detokenize([token]).decode("utf-8") , end='')
        count += 1
        if count >= 200:
            break

In [15]:
#command = "Don't do anything, John!"
#command = "John, beegle freegle, would ya?"
#command = "John, set the heading to 150 mark 190."
#command = "Bring us around, John! Reverse course!"
#command = "We've got our hands full today."
#command = "Bring us to full stop, John."
#command = "Full stop, John."
#command = "I love jelly!"
#command = "John, I love jelly!"
#command = "Set the jelly, John!"
#command = "Jelly set bongus heading."
#command = "Warp 3. Engage."
command = "What's our current heading?"

In [16]:
prompt = """I am going to give you a statement, and you are going to classify it. The possible classifications are Remark, Question, and Command. Respond "Classification: X" where X is the most likely classification. The input statement is "{}"

### Response:Classification:""".format(command)
print(prompt)
logits, probs = get_logits_and_probs(prompt)
# ' Question' or ' Command' or ' Rem'
classification = get_most_likely_token(probs)
classification

I am going to give you a statement, and you are going to classify it. The possible classifications are Remark, Question, and Command. Respond "Classification: X" where X is the most likely classification. The input statement is "What's our current heading?"

### Response:Classification:


  probs=F.softmax(torch.from_numpy(logits)).numpy()


b' Question'

In [17]:
classification = classify_statement(command)
if classification == ' Command':
    command_classification = classify_command(command)
    if command_classification == ' Un':
        handle_unknown_command(command)
    elif command_classification == ' Speed':
        set_speed(command)
    elif command_classification == ' Head':
        set_heading(command)
    print(command_classification)
else:
    print('Not a command')

Not a command


  probs=F.softmax(torch.from_numpy(logits)).numpy()
