## AWQ Mistral


In [None]:
from awq import AutoAWQForCausalLM
from transformers import AutoTokenizer
import torch
import os
from prompts import classifier_prompt

In [None]:
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
device

In [None]:
weights_dir = "./weights"
model_name_or_path = "TheBloke/Mistral-7B-v0.1-AWQ"

# Load model
model = AutoAWQForCausalLM.from_quantized(
    model_name_or_path,
    fuse_layers=True,
    trust_remote_code=False,
    safetensors=True,
    device=device,
    cache_dir=weights_dir,
)
tokenizer = AutoTokenizer.from_pretrained(
    model_name_or_path, trust_remote_code=False, device=device, cache_dir=weights_dir
)

### Test model


In [None]:
user_query = "Move robot tcp left for 1000mm"
prompt_template = f"""{classifier_prompt.format(user_query=user_query)}"""

print("\n\n*** Generate:")

tokens = tokenizer(prompt_template, return_tensors="pt").input_ids.cuda()

# Generate output
generation_output = model.generate(
    tokens, do_sample=True, temperature=0.1, top_p=0.95, top_k=40, max_new_tokens=30
)

print("Output: ", tokenizer.decode(generation_output[0]))

### Test pipeline


In [None]:
# Inference should be possible with transformers pipeline as well in future
# But currently this is not yet supported by AutoAWQ (correct as of September 25th 2023)
from transformers import pipeline

print("*** Pipeline:")
pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=512,
    do_sample=True,
    temperature=0.7,
    top_p=0.95,
    top_k=40,
    repetition_penalty=1.1,
    device=device,
)

print(pipe(prompt_template)[0]["generated_text"])

## GPTQ Mistral


In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
from prompts import *

In [None]:
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
device

In [None]:
weights_dir = "./weights"
model_name_or_path = "TheBloke/Mistral-7B-Instruct-v0.2-GPTQ"

# Load model
model = AutoModelForCausalLM.from_pretrained(
    model_name_or_path,
    device_map="auto",
    trust_remote_code=False,
    cache_dir=weights_dir,
)
tokenizer = AutoTokenizer.from_pretrained(
    model_name_or_path, trust_remote_code=False, device=device, cache_dir=weights_dir
)

## JSON former


In [None]:
from jsonformer import Jsonformer
from action_models import *
import json

In [None]:
user_query_tests = [
    "Move robot tcp left for 1000mm",
    "Move robot sixth joint for 30 degrees left",
    "Give me robot joint info",
    "Tell me info about robot",
    "Rotate robot base for 45 and move TCP along x axis for 50 milimeters.",
    "Rotate joint 2 for 30 and joint 7 for 45 degrees then joint 2 40 degrees",
]

In [None]:
prompt_template = f"""{split_actions.format(user_query=user_query_tests[2])}"""
jsonformer = Jsonformer(model, tokenizer, action_list, prompt_template)
generated_data = jsonformer()

print(json.dumps(generated_data, indent=4))

In [None]:
prompt_template = f"""{classifier_prompt_1.format(user_query=user_query_tests[5])}"""
jsonformer = Jsonformer(model, tokenizer, generic_function, prompt_template)
generated_data = jsonformer()

print(json.dumps(generated_data, indent=4))

In [None]:
json_schema = {
    "type": "object",
    "properties": {
        "function_name": {"type": "array", "items": {"type": "string"}},
    },
}

prompt_template = f"""{classifier_prompt_1.format(user_query=user_query_tests[5])}"""
jsonformer = Jsonformer(model, tokenizer, json_schema, prompt_template)
generated_data = jsonformer()

print(generated_data)

### Test model


In [None]:
user_query = ("Rotate robot base for 45 and move TCP along x axis for 50 milimeters.",)

prompt_template = f"""{classifier_prompt_1.format(user_query=user_query)}"""

print("\n\n*** Generate:")

tokens = tokenizer(prompt_template, return_tensors="pt").input_ids.cuda()

# Generate output
generation_output = model.generate(
    tokens, do_sample=True, temperature=0.1, top_p=0.95, top_k=40, max_new_tokens=512
)

print("Output: ", tokenizer.decode(generation_output[0]))

### Evaluate model with jsonformer


In [1]:
from datasets import load_from_disk, Dataset
import json
import jsondiff
from function_caller import FunctionCaller
from tqdm.notebook import tqdm

In [2]:
dataset = load_from_disk("../DATA/dataset_2024-Jan-30_23-29-10")
test_set = dataset["test"]

In [8]:
test_set[4]["user_query"]

{'data': {'function_calling': '{{"functions": [{{"function_name": "", "inputs": []}}]}}',
  'user_query': 'I am verifying the answers generated by an automatic system to the following question: what is an sd memory card reader Suggested answer: There are three categories of card readers sorted by the type and quantity of the card slots: single card reader (e.g. 1x SD-only), multi card reader (e.g. 9-in-1) and series card reader (e.g. 4x SD only). Should I validate this answer?\n\n\n[1]. No\n[2]. Yes\nAnswer:'}}

In [3]:
fn_caller = FunctionCaller()

In [4]:
fn_caller.generate(test_set[4]["data"]["user_query"])



RuntimeError: CUDA error: unknown error
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1.
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.


In [4]:
count = 0
for i in tqdm(range(test_set.num_rows)):
    user_query = test_set[i]["data"]["user_query"]
    expected_fn = test_set[i]["data"]["function_calling"]
    generated_fn = fn_caller.generate(user_query)
    # print(expected_fn)
    # print(generated_fn)
    if jsondiff.diff(expected_fn, generated_fn):
        count += 1
    # print(jsondiff.diff(expected_fn, generated_fn))
    # break/
print(count / test_set.num_rows * 100)

  0%|          | 0/119 [00:00<?, ?it/s]



RuntimeError: CUDA error: unknown error
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1.
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.


In [27]:
res = jsondiff.diff(
    {"hej": 2, "hej1": {"hey": 2}},
    {"hej1": [{"hey": 1}], "hej": 2},
)
if res:
    print(res)

{'hej1': [{'hey': 1}]}


In [17]:
f = open("../DATA/formatted-queries_2024-Jan-30_23-29-10.json")

dataset = json.load(f)
f.close()

In [23]:
list(dataset[0][1])[0] == "functions"

True

In [25]:
def check_generation(generation: dict, expected:dict):
    if not list(generation.keys())[0] == "functions":
        return False
    generated_fn = generation["functions"]
    ex
    for generated_fn, expected_fn in zip(functions):
