Skip to content

Commit

Permalink
blacked
Browse files Browse the repository at this point in the history
  • Loading branch information
innightwolfsleep committed Sep 10, 2023
1 parent dac40a2 commit 59c3642
Show file tree
Hide file tree
Showing 18 changed files with 972 additions and 634 deletions.
2 changes: 1 addition & 1 deletion characters/Answerer.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"char_name":"Answerer","char_persona":"Answerer is a being desired to answer your question.","char_greeting":"Hi!","world_scenario":"","example_dialogue":""}
{"char_name":"Answerer","char_persona":"Answerer is a AI coder who write a code for You.","char_greeting":"I am ready to help!","world_scenario":"","example_dialogue":""}
7 changes: 6 additions & 1 deletion configs/telegram_config.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,12 @@
{
"bot_mode": "admin",
"generator_script": "generator_llama_cpp",
"model_path": "models\\llama-2-13b-chat.ggmlv3.q4_0.gguf",
"model_path": "models\\huginn-13b-v4.ggmlv3.Q4_K_M.gguf",
"model_path": "models\\llama-2-7b-chat.ggmlv3.q4_0.gguf",
"model_path": "models\\codellama-13b.Q4_K_M.gguf",
"model_path": "models\\codellama-13b-python.ggmlv3.Q3_K_M.gguf",
"model_path": "models\\mythomax-l2-kimiko-v2-13b.ggmlv3.Q4_K_M.gguf",
"model_path": "models\\mythomax-l2-kimiko-v2-13b.ggmlv3.Q3_K_M.gguf",
"characters_dir_path": "characters",
"default_char": "Example.yaml",
"presets_dir_path": "presets",
Expand Down
10 changes: 5 additions & 5 deletions configs/telegram_generator_params.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,21 +5,21 @@
"top_p": 0.1,
"top_k": 40,
"typical_p": 1,
"repetition_penalty": 1.18,
"encoder_repetition_penalty": 1,
"repetition_penalty": 1.25,
"encoder_repetition_penalty": 1.25,
"no_repeat_ngram_size": 0,
"min_length": 0,
"do_sample": true,
"n_gpu_layers": 0,
"n_gpu_layers": 20,
"penalty_alpha": 200,
"num_beams": 1,
"length_penalty": 1,
"early_stopping": false,
"add_bos_token": true,
"ban_eos_token": false,
"truncation_length": 2536,
"truncation_length": 2024,
"custom_stopping_strings": "",
"chat_prompt_size": 2536,
"chat_prompt_size": 2024,
"chat_generation_attempts": 1,
"stop_at_newline": false,
"skip_special_tokens": true,
Expand Down
1 change: 1 addition & 0 deletions configs/telegram_token.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
6202948016:AAGALsQ98Jb_T4oCTqtDiz1TPWzpwq_fBkM
36 changes: 24 additions & 12 deletions generators/generator_langchain_llama_cpp.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,22 +12,30 @@

# Callbacks support token-wise streaming


class Generator:
model_change_allowed = False # if model changing allowed without stopping.
preset_change_allowed = True # if preset changing allowed.

def __init__(self, model_path, n_ctx=2048, seed=0, n_gpu_layers=0):
callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
self.llm = LlamaCpp(model_path=model_path, n_ctx=n_ctx, callback_manager=callback_manager, verbose=True)

def get_answer(self,
prompt,
generation_params,
eos_token,
stopping_strings,
default_answer,
turn_template='',
**kwargs):
self.llm = LlamaCpp(
model_path=model_path,
n_ctx=n_ctx,
callback_manager=callback_manager,
verbose=True,
)

def get_answer(
self,
prompt,
generation_params,
eos_token,
stopping_strings,
default_answer,
turn_template="",
**kwargs
):
if "max_tokens" in generation_params:
llm.max_tokens = generation_params["max_tokens"]
if "temperature" in generation_params:
Expand All @@ -36,7 +44,9 @@ def get_answer(self,
llm.top_p = generation_params["top_p"]
if "top_k" in generation_params:
llm.top_k = generation_params["top_k"]
prompt_template = PromptTemplate(template="{prompt}", input_variables=["prompt"])
prompt_template = PromptTemplate(
template="{prompt}", input_variables=["prompt"]
)
llm.stop = stopping_strings
llm_chain = LLMChain(prompt=prompt_template, llm=self.llm)
answer = llm_chain.run(prompt)
Expand All @@ -47,7 +57,9 @@ def tokens_count(self, text: str):
length = len(splitter.split_text(text))
return length

def get_model_list(self, ):
def get_model_list(
self,
):
bins = []
for i in os.listdir("models"):
if i.endswith(".bin"):
Expand Down
48 changes: 30 additions & 18 deletions generators/generator_llama_cpp.py
Original file line number Diff line number Diff line change
@@ -1,28 +1,31 @@
from llama_cpp import Llama
from llama_cpp import Llama as Llama_class
import os


class Generator:
# Place where path to LLM file stored
llm: Llama = None
llm: Llama_class = None
model_change_allowed = False # if model changing allowed without stopping.
preset_change_allowed = True # if preset changing allowed.

def __init__(self, model_path: str, n_ctx=4096, seed=0, n_gpu_layers=0):
self.n_ctx = n_ctx
self.seed = seed
self.n_gpu_layers = n_gpu_layers
print(n_gpu_layers)
self.llm = Llama(model_path=model_path, n_ctx=n_ctx, seed=seed, n_gpu_layers=n_gpu_layers)

def get_answer(self,
prompt,
generation_params,
eos_token,
stopping_strings,
default_answer: str,
turn_template='',
**kwargs):
self.llm = Llama_class(
model_path=model_path, n_ctx=n_ctx, seed=seed, n_gpu_layers=n_gpu_layers
)

def get_answer(
self,
prompt,
generation_params,
eos_token,
stopping_strings,
default_answer: str,
turn_template="",
**kwargs
):
# Preparing, add stopping_strings
answer = default_answer

Expand All @@ -35,7 +38,8 @@ def get_answer(self,
repeat_penalty=generation_params["repetition_penalty"],
stop=stopping_strings,
max_tokens=generation_params["max_new_tokens"],
echo=True)
echo=True,
)
answer = answer["choices"][0]["text"].replace(prompt, "")
except Exception as exception:
print("generator_wrapper get answer error ", exception)
Expand All @@ -46,11 +50,19 @@ def tokens_count(self, text: str):

def get_model_list(self):
bins = []
for i in os.listdir("../models"):
if i.endswith(".bin"):
for i in os.listdir("models"):
if i.endswith(".bin") or i.endswith(".gguf"):
bins.append(i)
return bins

def load_model(self, model_file: str):
with open("models\\" + model_file, "r") as model:
self.llm: Llama = Llama(model_path=model.read(), n_ctx=self.n_ctx, seed=self.seed)
self.llm: Llama_class = None
Llama_class = None
from llama_cpp import Llama as Llama_class

self.llm = Llama_class(
model_path="models\\" + model_file,
n_ctx=self.n_ctx,
seed=self.seed,
n_gpu_layers=self.n_gpu_layers,
)
126 changes: 73 additions & 53 deletions generators/generator_text_generator_webui.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,61 +11,82 @@ class Generator:
model_change_allowed = False # if model changing allowed without stopping.
preset_change_allowed = True # if preset changing allowed.

def __init__(self, model_path='', n_ctx=2048, n_gpu_layers=0):
def __init__(self, model_path="", n_ctx=2048, n_gpu_layers=0):
pass

@staticmethod
def get_answer(
prompt,
generation_params,
eos_token,
stopping_strings,
default_answer,
turn_template='',
**kwargs):
prompt,
generation_params,
eos_token,
stopping_strings,
default_answer,
turn_template="",
**kwargs
):
generation_params.update({"turn_template": turn_template})
generation_params.update({"stream": False,
'max_new_tokens': int(generation_params.get('max_new_tokens',
generation_params.get('max_length',
200))),
'do_sample': bool(generation_params.get('do_sample', True)),
'temperature': float(generation_params.get('temperature', 0.5)),
'top_p': float(generation_params.get('top_p', 1)),
'typical_p': float(
generation_params.get('typical_p', generation_params.get('typical', 1))),
'epsilon_cutoff': float(generation_params.get('epsilon_cutoff', 0)),
'eta_cutoff': float(generation_params.get('eta_cutoff', 0)),
'tfs': float(generation_params.get('tfs', 1)),
'top_a': float(generation_params.get('top_a', 0)),
'repetition_penalty': float(generation_params.get('repetition_penalty',
generation_params.get('rep_pen',
1.1))),
'repetition_penalty_range': int(generation_params.get('repetition_penalty_range', 0)),
'encoder_repetition_penalty': float(
generation_params.get('encoder_repetition_penalty', 1.0)),
'top_k': int(generation_params.get('top_k', 0)),
'min_length': int(generation_params.get('min_length', 0)),
'no_repeat_ngram_size': int(generation_params.get('no_repeat_ngram_size', 0)),
'num_beams': int(generation_params.get('num_beams', 1)),
'penalty_alpha': float(generation_params.get('penalty_alpha', 0)),
'length_penalty': float(generation_params.get('length_penalty', 1)),
'early_stopping': bool(generation_params.get('early_stopping', False)),
'mirostat_mode': int(generation_params.get('mirostat_mode', 0)),
'mirostat_tau': float(generation_params.get('mirostat_tau', 5)),
'mirostat_eta': float(generation_params.get('mirostat_eta', 0.1)),
'seed': int(generation_params.get('seed', -1)),
'add_bos_token': bool(generation_params.get('add_bos_token', True)),
'truncation_length': int(generation_params.get('truncation_length',
generation_params.get(
'max_context_length', 2048))),
'ban_eos_token': bool(generation_params.get('ban_eos_token', False)),
'skip_special_tokens': bool(generation_params.get('skip_special_tokens', True)),
'custom_stopping_strings': '', # leave this blank
'stopping_strings': generation_params.get('stopping_strings', []),
})
generator = generate_reply(question=prompt,
state=generation_params,
stopping_strings=stopping_strings)
generation_params.update(
{
"stream": False,
"max_new_tokens": int(
generation_params.get(
"max_new_tokens", generation_params.get("max_length", 200)
)
),
"do_sample": bool(generation_params.get("do_sample", True)),
"temperature": float(generation_params.get("temperature", 0.5)),
"top_p": float(generation_params.get("top_p", 1)),
"typical_p": float(
generation_params.get(
"typical_p", generation_params.get("typical", 1)
)
),
"epsilon_cutoff": float(generation_params.get("epsilon_cutoff", 0)),
"eta_cutoff": float(generation_params.get("eta_cutoff", 0)),
"tfs": float(generation_params.get("tfs", 1)),
"top_a": float(generation_params.get("top_a", 0)),
"repetition_penalty": float(
generation_params.get(
"repetition_penalty", generation_params.get("rep_pen", 1.1)
)
),
"repetition_penalty_range": int(
generation_params.get("repetition_penalty_range", 0)
),
"encoder_repetition_penalty": float(
generation_params.get("encoder_repetition_penalty", 1.0)
),
"top_k": int(generation_params.get("top_k", 0)),
"min_length": int(generation_params.get("min_length", 0)),
"no_repeat_ngram_size": int(
generation_params.get("no_repeat_ngram_size", 0)
),
"num_beams": int(generation_params.get("num_beams", 1)),
"penalty_alpha": float(generation_params.get("penalty_alpha", 0)),
"length_penalty": float(generation_params.get("length_penalty", 1)),
"early_stopping": bool(generation_params.get("early_stopping", False)),
"mirostat_mode": int(generation_params.get("mirostat_mode", 0)),
"mirostat_tau": float(generation_params.get("mirostat_tau", 5)),
"mirostat_eta": float(generation_params.get("mirostat_eta", 0.1)),
"seed": int(generation_params.get("seed", -1)),
"add_bos_token": bool(generation_params.get("add_bos_token", True)),
"truncation_length": int(
generation_params.get(
"truncation_length",
generation_params.get("max_context_length", 2048),
)
),
"ban_eos_token": bool(generation_params.get("ban_eos_token", False)),
"skip_special_tokens": bool(
generation_params.get("skip_special_tokens", True)
),
"custom_stopping_strings": "", # leave this blank
"stopping_strings": generation_params.get("stopping_strings", []),
}
)
generator = generate_reply(
question=prompt, state=generation_params, stopping_strings=stopping_strings
)
# This is "bad" implementation of getting answer, should be reworked
answer = default_answer
for a in generator:
Expand All @@ -88,9 +109,8 @@ def get_model_list():
def load_model(model_file: str):
server.unload_model()
server.model_name = model_file
if model_file != '':
shared.model, shared.tokenizer = server.load_model(
shared.model_name)
if model_file != "":
shared.model, shared.tokenizer = server.load_model(shared.model_name)
while server.load_model is None:
time.sleep(1)
return True
41 changes: 24 additions & 17 deletions generators/generator_text_generator_webui_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,34 +6,41 @@ class Generator:
model_change_allowed = False # if model changing allowed without stopping.
preset_change_allowed = False # if preset changing allowed.

def __init__(self, model_path=f'http://localhost:5000/api/v1/chat', n_ctx=2048, seed=0, n_gpu_layers=0):
def __init__(
self,
model_path=f"http://localhost:5000/api/v1/chat",
n_ctx=2048,
seed=0,
n_gpu_layers=0,
):
self.n_ctx = n_ctx
if model_path.startswith('http'):
if model_path.startswith("http"):
self.URI = model_path
else:
self.URI = f'http://localhost:5000/api/v1/chat'
self.URI = f"http://localhost:5000/api/v1/chat"

def get_answer(
self,
prompt,
generation_params,
eos_token,
stopping_strings,
default_answer,
turn_template='',
**kwargs):
self,
prompt,
generation_params,
eos_token,
stopping_strings,
default_answer,
turn_template="",
**kwargs,
):
request = {
'user_input': prompt,
'eos_token': eos_token,
'stopping_strings': stopping_strings,
'turn_template': turn_template,
"user_input": prompt,
"eos_token": eos_token,
"stopping_strings": stopping_strings,
"turn_template": turn_template,
}
response = requests.post(self.URI, json=request)

if response.status_code == 200:
result = response.json()['results'][0]['history']
result = response.json()["results"][0]["history"]
print(json.dumps(result, indent=4))
return result['visible'][-1][1]
return result["visible"][-1][1]
else:
return default_answer

Expand Down
Loading

0 comments on commit 59c3642

Please sign in to comment.