Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

TTS update and double click to load story #474

Merged
merged 19 commits into from
Jan 23, 2024
Merged
Show file tree
Hide file tree
Changes from 18 commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
0adedd1
Merge branch 'Model_Plugins' of https://github.com/ebolam/KoboldAI in…
ebolam Oct 5, 2023
4d793bf
Merge branch 'henk717:united' into Model_Plugins
ebolam Oct 6, 2023
88d4dc8
Enhancements to auto-memory test. Seems to be more coherent.
ebolam Oct 6, 2023
334eec6
Fix for text streaming not scrolling properly.
ebolam Oct 13, 2023
cbbcc62
Fix for exllama (v1 and v2) showing 2x status (0-200%) on generation
ebolam Oct 13, 2023
0688ba4
Merge branch 'Model_Plugins' of https://github.com/ebolam/KoboldAI in…
ebolam Oct 13, 2023
d5dd24a
Added setting saving for exllama and exllamav2
ebolam Oct 13, 2023
79e951b
Fix for slider bars in model load not setting correctly
ebolam Oct 13, 2023
c1a9659
Prevent model loading status from going over 100%
ebolam Oct 13, 2023
21e6d84
Add checkpoint tracking for loading
one-some Oct 13, 2023
8cb956f
Merge pull request #387 from one-some/checkpoint-progress
ebolam Oct 13, 2023
2319d1d
Model load progress bar fixed for multi-checkpoint models
ebolam Oct 13, 2023
7545754
Allow clear on images to actually clear past images.
ebolam Oct 14, 2023
86b60af
Missed a pip install for subfolder
ebolam Oct 14, 2023
782f314
merge caused two packages to be on one line
ebolam Oct 14, 2023
f06069c
Fix for tortoise tts install script
ebolam Oct 14, 2023
e946da7
Removed tortoise tts install scripts
ebolam Oct 14, 2023
80b386b
Merge commit 'refs/pull/388/head' of https://github.com/ebolam/Kobold…
ebolam Nov 3, 2023
647a82e
Merge branch 'united' into Model_Plugins
ebolam Jan 6, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
81 changes: 50 additions & 31 deletions aiserver.py
Original file line number Diff line number Diff line change
Expand Up @@ -1474,7 +1474,7 @@ def general_startup(override_args=None):
parser.add_argument("--cacheonly", action='store_true', help="Does not save the model to the models folder when it has been downloaded in the cache")
parser.add_argument("--customsettings", help="Preloads arguements from json file. You only need to provide the location of the json file. Use customsettings.json template file. It can be renamed if you wish so that you can store multiple configurations. Leave any settings you want as default as null. Any values you wish to set need to be in double quotation marks")
parser.add_argument("--no_ui", action='store_true', default=False, help="Disables the GUI and Socket.IO server while leaving the API server running.")
parser.add_argument("--summarizer_model", action='store', default="philschmid/bart-large-cnn-samsum", help="Huggingface model to use for summarization. Defaults to sshleifer/distilbart-cnn-12-6")
parser.add_argument("--summarizer_model", action='store', default="pszemraj/led-large-book-summary", help="Huggingface model to use for summarization. Defaults to pszemraj/led-large-book-summary")
parser.add_argument("--max_summary_length", action='store', default=75, help="Maximum size for summary to send to image generation")
parser.add_argument("--multi_story", action='store_true', default=False, help="Allow multi-story mode (experimental)")
parser.add_argument("--peft", type=str, help="Specify the path or HuggingFace ID of a Peft to load it. Not supported on TPU. (Experimental)")
Expand Down Expand Up @@ -7583,8 +7583,11 @@ def text2img_api(prompt, art_guide="") -> Image.Image:
@socketio.on("clear_generated_image")
@logger.catch
def UI2_clear_generated_image(data):
koboldai_vars.picture = ""
koboldai_vars.picture_prompt = ""
if 'action_id' in data and data['action_id'] is not None:
koboldai_vars.actions.clear_picture(data['action_id'])
else:
koboldai_vars.picture = ""
koboldai_vars.picture_prompt = ""

#==================================================================#
# Retrieve previous images
Expand All @@ -7597,7 +7600,9 @@ def UI_2_get_story_image(data):
print(filename)
if filename is not None:
with open(filename, "rb") as image_file:
return base64.b64encode(image_file.read()).decode("utf-8")
return {'img': base64.b64encode(image_file.read()).decode("utf-8"), 'action_id': action_id}
else:
return {'img': None, 'action_id': action_id}

#@logger.catch
def get_items_locations_from_text(text):
Expand Down Expand Up @@ -7648,16 +7653,19 @@ def get_items_locations_from_text(text):
#==================================================================#
def summarize(text, max_length=100, min_length=30, unload=True):
from transformers import pipeline as summary_pipeline
from transformers import AutoConfig
start_time = time.time()
if koboldai_vars.summarizer is None:
if os.path.exists("functional_models/{}".format(args.summarizer_model.replace('/', '_'))):
koboldai_vars.summary_tokenizer = AutoTokenizer.from_pretrained("functional_models/{}".format(args.summarizer_model.replace('/', '_')), cache_dir="cache")
koboldai_vars.summarizer = AutoModelForSeq2SeqLM.from_pretrained("functional_models/{}".format(args.summarizer_model.replace('/', '_')), cache_dir="cache")
koboldai_vars.summary_model_config = AutoConfig.from_pretrained("functional_models/{}".format(args.summarizer_model.replace('/', '_')), cache_dir="cache")
else:
koboldai_vars.summary_tokenizer = AutoTokenizer.from_pretrained(args.summarizer_model, cache_dir="cache")
koboldai_vars.summarizer = AutoModelForSeq2SeqLM.from_pretrained(args.summarizer_model, cache_dir="cache")
koboldai_vars.summary_tokenizer.save_pretrained("functional_models/{}".format(args.summarizer_model.replace('/', '_')), max_shard_size="500MiB")
koboldai_vars.summarizer.save_pretrained("functional_models/{}".format(args.summarizer_model.replace('/', '_')), max_shard_size="500MiB")
koboldai_vars.summary_model_config = AutoConfig.from_pretrained(args.summarizer_model, cache_dir="cache")

#Try GPU accel
if koboldai_vars.hascuda and torch.cuda.get_device_properties(0).total_memory - torch.cuda.memory_reserved(0) >= 1645778560:
Expand All @@ -7671,9 +7679,27 @@ def summarize(text, max_length=100, min_length=30, unload=True):
#Actual sumarization
start_time = time.time()
#make sure text is less than 1024 tokens, otherwise we'll crash
if len(koboldai_vars.summary_tokenizer.encode(text)) > 1000:
text = koboldai_vars.summary_tokenizer.decode(koboldai_vars.summary_tokenizer.encode(text)[:1000])
output = tpool.execute(summarizer, text, max_length=max_length, min_length=min_length, do_sample=False)[0]['summary_text']
max_tokens = koboldai_vars.summary_model_config.max_encoder_position_embeddings if hasattr(koboldai_vars.summary_model_config, 'max_encoder_position_embeddings') else 1024
logger.info("Using max summary tokens of {}".format(max_tokens))
if len(koboldai_vars.summary_tokenizer.encode(text)) > max_tokens:
text_list = koboldai_vars.actions.sentence_re.findall(text)
i=0
while i <= len(text_list)-2:
if len(koboldai_vars.summary_tokenizer.encode(text_list[i] + text_list[i+1])) < max_tokens:
text_list[i] = text_list[i] + text_list[i+1]
del text_list[i+1]
else:
i+=1


else:
text_list = [text]

output = []
logger.info("Summarizing with {} chunks of length {}".format(len(text_list), [len(koboldai_vars.summary_tokenizer.encode(x)) for x in text_list]))
for text in text_list:
output.append(tpool.execute(summarizer, text, max_length=max_length, min_length=min_length, do_sample=False)[0]['summary_text'])
output = " ".join(output)
logger.debug("Time to summarize: {}".format(time.time()-start_time))
#move model back to CPU to save precious vram
torch.cuda.empty_cache()
Expand All @@ -7693,40 +7719,33 @@ def summarize(text, max_length=100, min_length=30, unload=True):
@socketio.on("refresh_auto_memory")
@logger.catch
def UI_2_refresh_auto_memory(data):
max_output_length=500
from transformers import AutoConfig
koboldai_vars.auto_memory = "Generating..."
if koboldai_vars.summary_tokenizer is None:
if os.path.exists("models/{}".format(args.summarizer_model.replace('/', '_'))):
koboldai_vars.summary_tokenizer = AutoTokenizer.from_pretrained("models/{}".format(args.summarizer_model.replace('/', '_')), cache_dir="cache")
if os.path.exists("functional_models/{}".format(args.summarizer_model.replace('/', '_'))):
koboldai_vars.summary_tokenizer = AutoTokenizer.from_pretrained("functional_models/{}".format(args.summarizer_model.replace('/', '_')), cache_dir="cache")
koboldai_vars.summary_model_config = AutoConfig.from_pretrained("functional_models/{}".format(args.summarizer_model.replace('/', '_')), cache_dir="cache")
else:
koboldai_vars.summary_tokenizer = AutoTokenizer.from_pretrained(args.summarizer_model, cache_dir="cache")
#first, let's get all of our game text and split it into sentences
sentences = [x[0] for x in koboldai_vars.actions.to_sentences()]
sentences_lengths = [len(koboldai_vars.summary_tokenizer.encode(x)) for x in sentences]
koboldai_vars.summary_model_config = AutoConfig.from_pretrained(args.summarizer_model, cache_dir="cache")
max_tokens = koboldai_vars.summary_model_config.max_encoder_position_embeddings if hasattr(koboldai_vars.summary_model_config, 'max_encoder_position_embeddings') else 1024

#first, let's get all of our game text
sentences = "".join([x[0] for x in koboldai_vars.actions.to_sentences()])

pass_number = 1
while len(koboldai_vars.summary_tokenizer.encode("".join(sentences))) > 1000:
#Now let's split them into 1000 token chunks
summary_chunks = [""]
summary_chunk_lengths = [0]
for i in range(len(sentences)):
if summary_chunk_lengths[-1] + sentences_lengths[i] <= 1000:
summary_chunks[-1] += sentences[i]
summary_chunk_lengths[-1] += sentences_lengths[i]
else:
summary_chunks.append(sentences[i])
summary_chunk_lengths.append(sentences_lengths[i])
new_sentences = []
i=0
for summary_chunk in summary_chunks:
logger.debug("summarizing chunk {}".format(i))
new_sentences.extend(re.split("(?<=[.!?])\s+", summarize(summary_chunk, unload=False)))
i+=1
while len(koboldai_vars.summary_tokenizer.encode(sentences)) > max_tokens:
new_sentences = summarize(sentences, unload=False, max_length=max_output_length)
logger.debug("Pass {}:\nSummarized to {} sentencees from {}".format(pass_number, len(new_sentences), len(sentences)))
sentences = new_sentences
koboldai_vars.auto_memory += "Pass {}:\n{}\n\n".format(pass_number, "\n".join(sentences))
koboldai_vars.auto_memory += "Pass {}:\n{}\n\n".format(pass_number, sentences)
pass_number+=1
logger.debug("OK, doing final summarization")
output = summarize(" ".join(sentences))
if len(koboldai_vars.summary_tokenizer.encode(sentences)) > max_output_length:
output = summarize(sentences, max_length=max_output_length)
else:
output = sentences
koboldai_vars.auto_memory += "\n\n Final Result:\n" + output


Expand Down
37 changes: 28 additions & 9 deletions koboldai_settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -687,8 +687,9 @@ def send_to_ui(self):

class model_settings(settings):
local_only_variables = ['apikey', 'default_preset']
no_save_variables = ['modelconfig', 'custmodpth', 'generated_tkns',
'loaded_layers', 'total_layers', 'total_download_chunks', 'downloaded_chunks', 'presets', 'default_preset',
no_save_variables = ['modelconfig', 'custmodpth', 'generated_tkns',
'loaded_layers', 'total_layers', 'loaded_checkpoints', 'total_checkpoints',
'total_download_chunks', 'downloaded_chunks', 'presets', 'default_preset',
'welcome', 'welcome_default', 'simple_randomness', 'simple_creativity', 'simple_repitition',
'badwordsids', 'uid_presets', 'model', 'model_type', 'lazy_load', 'fp32_model', 'modeldim', 'horde_wait_time', 'horde_queue_position', 'horde_queue_size', 'newlinemode', 'tqdm_progress', 'tqdm_rem_time', '_tqdm']
settings_name = "model"
Expand All @@ -705,6 +706,8 @@ def __init__(self, socketio, koboldai_vars):
self.generated_tkns = 0 # If using a backend that supports Lua generation modifiers, how many tokens have already been generated, otherwise 0
self.loaded_layers = 0 # Used in UI 2 to show model loading progress
self.total_layers = 0 # Same as above
self.loaded_checkpoints = 0
self.total_checkpoints = 1
self.total_download_chunks = 0 # tracks how much of the model has downloaded for the UI 2
self.downloaded_chunks = 0 #as above
self._tqdm = tqdm.tqdm(total=self.genamt, file=self.ignore_tqdm()) # tqdm agent for generating tokens. This will allow us to calculate the remaining time
Expand Down Expand Up @@ -829,13 +832,22 @@ def __setattr__(self, name, value):
#Setup TQDP for model loading
elif name == "loaded_layers" and '_tqdm' in self.__dict__:
if value == 0:
self._tqdm.reset(total=self.total_layers)
self._tqdm.reset(total=self.total_layers if self.total_checkpoints == 1 else 1000)
self.tqdm_progress = 0
else:
self._tqdm.update(1)
self.tqdm_progress = int(float(self.loaded_layers)/float(self.total_layers)*100)
if self.total_checkpoints == 1:
self._tqdm.update(1)
elif self.total_layers != 0 and self.total_checkpoints != 0:
proper_progress = (self.loaded_checkpoints + value/self.total_layers)/self.total_checkpoints*1000
self._tqdm.update(proper_progress - self._tqdm.n)

self.tqdm_progress = int(float(self._tqdm.n)/float(self._tqdm.total)*100)

if self._tqdm.format_dict['rate'] is not None:
self.tqdm_rem_time = str(datetime.timedelta(seconds=int(float(self.total_layers-self.loaded_layers)/self._tqdm.format_dict['rate'])))
elapsed = self._tqdm.format_dict["elapsed"]
rate = self._tqdm.format_dict["rate"]
remaining = (self._tqdm.total - self._tqdm.n) / rate if rate and self._tqdm.total else 0
self.tqdm_rem_time = str(datetime.timedelta(seconds=remaining))
#Setup TQDP for model downloading
elif name == "total_download_chunks" and '_tqdm' in self.__dict__:
self._tqdm.reset(total=value)
Expand Down Expand Up @@ -1247,11 +1259,11 @@ def __setattr__(self, name, value):
class system_settings(settings):
local_only_variables = ['lua_state', 'lua_logname', 'lua_koboldbridge', 'lua_kobold',
'lua_koboldcore', 'regex_sl', 'acregex_ai', 'acregex_ui', 'comregex_ai', 'comregex_ui',
'sp', '_horde_pid', 'inference_config', 'image_pipeline',
'sp', '_horde_pid', 'inference_config', 'image_pipeline', 'summary_model_config',
'summarizer', 'summary_tokenizer', 'tts_model', 'rng_states', 'comregex_ai', 'comregex_ui', 'colab_arg']
no_save_variables = ['lua_state', 'lua_logname', 'lua_koboldbridge', 'lua_kobold',
no_save_variables = ['lua_state', 'lua_logname', 'lua_koboldbridge', 'lua_kobold', 'summary_model_config',
'lua_koboldcore', 'sp', 'sp_length', '_horde_pid', 'horde_share', 'aibusy',
'serverstarted', 'inference_config', 'image_pipeline', 'summarizer', 'on_colab'
'serverstarted', 'inference_config', 'image_pipeline', 'summarizer', 'on_colab', 'quiet',
'summary_tokenizer', 'use_colab_tpu', 'noai', 'disable_set_aibusy', 'cloudflare_link', 'tts_model',
'generating_image', 'bit_8_available', 'host', 'hascuda', 'usegpu', 'rng_states', 'comregex_ai', 'comregex_ui', 'git_repository', 'git_branch', 'colab_arg']
settings_name = "system"
Expand Down Expand Up @@ -1334,6 +1346,7 @@ def __init__(self, socketio, koboldai_var):
self.image_pipeline = None
self.summarizer = None
self.summary_tokenizer = None
self.summary_model_config = {}
self.keep_img_gen_in_memory = False
self.cookies = {} #cookies for colab since colab's URL changes, cookies are lost
self.experimental_features = False
Expand Down Expand Up @@ -2231,6 +2244,12 @@ def set_picture(self, action_id, filename, prompt):
self.actions[action_id]['picture_filename'] = filename
self.actions[action_id]['picture_prompt'] = prompt

def clear_picture(self, action_id):
action_id = int(action_id)
if action_id in self.actions:
del self.actions[action_id]['picture_filename']
del self.actions[action_id]['picture_prompt']

def get_picture(self, action_id):
if action_id == -1:
if self.story_settings.prompt_picture_filename == "":
Expand Down
35 changes: 30 additions & 5 deletions modeling/inference_models/exllama/class.py
Original file line number Diff line number Diff line change
Expand Up @@ -340,7 +340,8 @@ def _raw_generate(

self._post_token_gen(self.generator.sequence)

utils.koboldai_vars.generated_tkns += 1
#This is taken care of in the core stopper class that's called below. If you're not using core stoppers then it should remain here
#utils.koboldai_vars.generated_tkns += 1

# Apply stoppers
do_stop = False
Expand Down Expand Up @@ -374,6 +375,12 @@ def _get_tokenizer(self, location: str):
return tokenizer

def get_requested_parameters(self, model_name, model_path, menu_path, parameters = {}):
saved_data = {'layers': [], 'max_ctx': 2048, 'compress_emb': 1, 'ntk_alpha': 1}
if os.path.exists("settings/{}.exllama.model_backend.settings".format(model_name.replace("/", "_"))) and 'base_url' not in vars(self):
with open("settings/{}.exllama.model_backend.settings".format(model_name.replace("/", "_")), "r") as f:
temp = json.load(f)
for key in temp:
saved_data[key] = temp[key]
requested_parameters = []
gpu_count = torch.cuda.device_count()
layer_count = self.model_config["n_layer"] if isinstance(self.model_config, dict) else self.model_config.num_layers if hasattr(self.model_config, "num_layers") else self.model_config.n_layer if hasattr(self.model_config, "n_layer") else self.model_config.num_hidden_layers if hasattr(self.model_config, 'num_hidden_layers') else None
Expand All @@ -400,7 +407,7 @@ def get_requested_parameters(self, model_name, model_path, menu_path, parameters
"step": 1,
"check": {"sum": ["{}_Layers".format(i) for i in range(gpu_count)], "value": layer_count, 'check': "="},
"check_message": "The sum of assigned layers must equal {}".format(layer_count),
"default": [layer_count if i == 0 else 0],
"default": saved_data['layers'][i] if len(saved_data['layers']) > i else layer_count if i==0 else 0,
"tooltip": "The number of layers to put on {}.".format(torch.cuda.get_device_name(i)),
"menu_path": "Layers",
"extra_classes": "",
Expand All @@ -415,7 +422,7 @@ def get_requested_parameters(self, model_name, model_path, menu_path, parameters
"min": 2048,
"max": 16384,
"step": 512,
"default": 2048,
"default": saved_data['max_ctx'],
"tooltip": "The maximum context size the model supports",
"menu_path": "Configuration",
"extra_classes": "",
Expand All @@ -430,7 +437,7 @@ def get_requested_parameters(self, model_name, model_path, menu_path, parameters
"min": 1,
"max": 8,
"step": 0.25,
"default": 1,
"default": saved_data['compress_emb'],
"tooltip": "If the model requires compressed embeddings, set them here",
"menu_path": "Configuration",
"extra_classes": "",
Expand All @@ -445,7 +452,7 @@ def get_requested_parameters(self, model_name, model_path, menu_path, parameters
"min": 1,
"max": 32,
"step": 0.25,
"default": 1,
"default": saved_data['ntk_alpha'],
"tooltip": "NTK alpha value",
"menu_path": "Configuration",
"extra_classes": "",
Expand Down Expand Up @@ -490,3 +497,21 @@ def set_input_parameters(self, parameters):

self.model_name = parameters['custom_model_name'] if 'custom_model_name' in parameters else parameters['id']
self.path = parameters['path'] if 'path' in parameters else None

def _save_settings(self):
with open(
"settings/{}.exllama.model_backend.settings".format(
self.model_name.replace("/", "_")
),
"w",
) as f:
json.dump(
{
"layers": self.layers if "layers" in vars(self) else [],
"max_ctx": self.model_config.max_seq_len,
"compress_emb": self.model_config.compress_pos_emb,
"ntk_alpha": self.model_config.alpha_value
},
f,
indent="",
)
Loading