-
Notifications
You must be signed in to change notification settings - Fork 1.3k
ValueError: could not broadcast input array from shape (32000,) into shape (0,) in line 480 #657
Copy link
Copy link
Closed as not planned
Closed as not planned
Copy link
Labels
bugSomething isn't workingSomething isn't working
Description
Prerequisites
- I am running the latest code. Development is very rapid so there are no tagged versions as of now.
- I carefully followed the README.md.
- I searched using keywords relevant to my issue to make sure that I am creating a new issue that is not already open (or closed).
- I reviewed the Discussions, and have a new bug or useful enhancement to share.
Expected Behavior
It should interact with the user in a chat system infinitly
Current Behavior
After a few questions and answers the program throws this error at me:
Traceback (most recent call last):
File "app.py", line 160, in <module>
init()
File "app.py", line 43, in init
process_user_input(input_txt)
File "app.py", line 64, in process_user_input
repeat_penalty=REPEAT_PENALTY,
File "app.py", line 104, in m_generate
m_eval(model, tokens, True)
File "app.py", line 157, in m_eval
__eval()
File "app.py", line 150, in __eval
model.eval(batch)
File "D:\Work\AI Freind\Venv\lib\site-packages\llama_cpp\llama.py", line 480, in eval
)[:] = llama_cpp.llama_get_logits(self.ctx)[: rows * cols]
ValueError: could not broadcast input array from shape (32000,) into shape (0,)Environment and Context
I am using Llama With Persona for the chat system whilst running a GTX 1660 Ti, 16GB RAM, i7-9750H CPU, Windows 11
Failure Information (for bugs)
The error seems to be happening on Line 480, It only happens after a few messages.
Steps to Reproduce
Please provide detailed steps for reproducing the issue. We are not sitting in front of your screen, so the more detail the better.
git clone https://github.com/ngxson/llama-persona.gitpip install -r requirements.txt- Download llama.cpp compatible model and place it in the project root directory.
- Copy config.example.py to config.py
- Copy my config code into your config file
- Copy my app.py code into your app.py file
- run app.py with
python app.py - Go back and fourth with some messages and the error should pop up
Failure Logs
This is my app.py/main.py file:
import llama_cpp
import sys
import re
import signal
import imp
from alive_progress import alive_bar
try:
imp.find_module("config")
from config import *
except ImportError:
print("Cannot find config.py")
exit(1)
model = llama_cpp.Llama(
model_path=MODEL_PATH,
seed=SEED,
n_threads=N_THREADS,
last_n_tokens_size=N_LAST_TOKENS,
n_ctx=N_CTX,
)
TOKEN_BOS = model.token_bos()
TOKEN_EOS = model.token_eos()
PROMPT_INIT = f""" {PERSONA_DESC}
Pretend that you are {PERSONA_NAME}. Below is an instruction that describes a task. Write a response that appropriately completes the request.""".encode()
is_received_stop_signal = False # TODO: catching SIGINT signal
def init():
global state_after_init_prompt
print("")
m_eval(model, m_tokenize(model, PROMPT_INIT, True), False, "Starting up...")
try:
while True:
print("\n> ", end="", flush=True)
input_txt = input("You: ")
process_user_input(input_txt)
except KeyboardInterrupt:
pass
def process_user_input(text):
global state_after_init_prompt, is_received_stop_signal
is_received_stop_signal = False
# generate response
response_bytes = b""
response_txt = ""
input_tokens = m_tokenize(
model, (f"\n\n### Instruction:\n\n{text}\n\n### Response:\n\n").encode()
)
for token in m_generate(
model,
input_tokens,
top_k=TOP_K,
top_p=TOP_P,
temp=TEMP,
repeat_penalty=REPEAT_PENALTY,
):
if token == TOKEN_EOS:
break
should_stop = False
response_added_bytes = model.detokenize([token])
response_bytes += response_added_bytes
response_txt = response_bytes.decode("utf-8", errors="ignore")
if "###" in response_txt:
response_txt = re.sub(r"\s+###", "", response_txt)
sys.stdout.write("\033[K") # Clear to the end of line
print(response_txt.split("\n")[-1], end="", flush=True)
should_stop = True
print(response_added_bytes.decode(errors="ignore"), end="", flush=True)
if should_stop:
break
# build context for next message
input_ins_truncated = " ".join(text.split(" ")[:N_TOKENS_KEEP_INS])
print("1")
input_res_truncated = " ".join(response_txt.split(" ")[:N_TOKENS_KEEP_RES])
print("2")
input_history = f"\n\n### Instruction:\n\n{input_ins_truncated}\n\n### Response:\n\n{input_res_truncated}"
print("3")
history_tokens = m_tokenize(model, input_history.encode())
print("4")
print("\n\n", end="", flush=True)
print("5")
m_eval(model, history_tokens, False, "Build context...")
print("6")
def m_generate(model: llama_cpp.Llama, tokens, top_k, top_p, temp, repeat_penalty):
"""Generate without self.reset()"""
global is_received_stop_signal
is_received_stop_signal = False
try:
while True:
if is_received_stop_signal:
yield TOKEN_EOS
m_eval(model, tokens, True)
token = model.sample(
top_k=top_k,
top_p=top_p,
temp=temp,
repeat_penalty=repeat_penalty,
)
tokens_or_none = yield token
tokens = [token]
if tokens_or_none is not None:
tokens.extend(tokens_or_none)
except KeyboardInterrupt:
pass
def m_tokenize(model: llama_cpp.Llama, text: bytes, add_bos=False):
assert model.ctx is not None
n_ctx = llama_cpp.llama_n_ctx(model.ctx)
tokens = (llama_cpp.llama_token * int(n_ctx))()
n_tokens = llama_cpp.llama_tokenize(
model.ctx,
text,
tokens,
n_ctx,
llama_cpp.c_bool(add_bos),
)
if int(n_tokens) < 0:
raise RuntimeError(f'Failed to tokenize: text="{text}" n_tokens={n_tokens}')
return list(tokens[:n_tokens])
def m_eval(model: llama_cpp.Llama, tokens, stop_on_signal=False, show_progress=False):
global is_received_stop_signal
def chunks(lst, n):
return [lst[i : i + n] for i in range(0, len(lst), n)]
batches = chunks(tokens, N_BATCH)
def __eval(bar=None):
global is_received_stop_signal
for i, batch in enumerate(batches):
if stop_on_signal and is_received_stop_signal:
is_received_stop_signal = False
return
else:
model.eval(batch)
bar(len(batch)) if bar is not None else None
if show_progress:
with alive_bar(len(tokens), theme="classic", title=show_progress) as bar:
__eval(bar)
else:
__eval()
init()This is my config.py file:
import random
MODEL_PATH = "D:\\Work\\AI Freind\\Components\\Character AI\\llama-persona\\Models\\13b_Roleplay_(3min).bin"
N_THREADS = 12
TOP_K = 80
TOP_P = 1
TEMP = 0.4
REPEAT_PENALTY = 1.1
N_BATCH = 20
N_CTX = 2048 * 2 # 10000000 2048
N_LAST_TOKENS = 48
SEED = random.randint(1, 10000000000000000)
# persona; ideally in one paragraph (about 200-300 words)
PERSONA_NAME = "DanTDM"
PERSONA_DESC = "Your name is DanTDM"
# number of tokens to be kept for context history
N_TOKENS_KEEP_INS = 10000000
N_TOKENS_KEEP_RES = 20000000Reactions are currently unavailable
Metadata
Metadata
Assignees
Labels
bugSomething isn't workingSomething isn't working