From 565ffff30bbaf3d3317973d85ea608924d2dcd12 Mon Sep 17 00:00:00 2001 From: Aqsz <32328461+Qing25@users.noreply.github.com> Date: Tue, 18 Jun 2024 16:11:35 +0800 Subject: [PATCH 1/3] =?UTF-8?q?=E4=BF=AE=E6=94=B9=20load=5Fchats=E9=80=BB?= =?UTF-8?q?=E8=BE=91=EF=BC=8C=E5=A2=9E=E5=8A=A0=20warning=20=E6=8F=90?= =?UTF-8?q?=E5=8D=87=E6=9C=AA=E5=AE=8C=E6=88=90=E7=9A=84=E6=95=B0=E9=87=8F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- chattool/checkpoint.py | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/chattool/checkpoint.py b/chattool/checkpoint.py index d93c34c..234bc08 100644 --- a/chattool/checkpoint.py +++ b/chattool/checkpoint.py @@ -23,18 +23,24 @@ def load_chats( checkpoint:str): if len(txts) == 1 and txts[0] == '': return [] # get the chatlogs logs = [json.loads(txt) for txt in txts] - chat_size, chatlogs = 1, [None] - for log in logs: - idx = log['index'] - if idx >= chat_size: # extend chatlogs - chatlogs.extend([None] * (idx - chat_size + 1)) - chat_size = idx + 1 - chatlogs[idx] = log['chat_log'] + # mapping from index to chat object + idx2chatlog = { + log['index']: Chat(log['chat_log']) + for log in logs + } + num_unfinished, chat_objects = 0, [] # count the unfinished num + index, max_index = 0, max(idx2chatlog.keys()) + while index <= max_index: + chat_object = idx2chatlog.get(index) + if chat_object is None: + num_unfinished += 1 + chat_objects.append(chat_object) + index += 1 # check if there are missing chatlogs - if None in chatlogs: - warnings.warn(f"checkpoint file {checkpoint} has unfinished chats") + if num_unfinished > 0: + warnings.warn(f"checkpoint file {checkpoint} has {num_unfinished} unfinished chats of {index} in total.") # return Chat class - return [Chat(chat_log) if chat_log is not None else None for chat_log in chatlogs] + return chat_objects def process_chats( data:List[Any] , data2chat:Callable[[Any], Chat] From a2b4a13179721e018c77cbc3bcacbf1e51ced226 Mon Sep 17 00:00:00 2001 From: Aqsz <32328461+Qing25@users.noreply.github.com> Date: Tue, 18 Jun 2024 16:23:05 +0800 Subject: [PATCH 2/3] Update checkpoint.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 更少的行数 --- chattool/checkpoint.py | 20 ++++++-------------- 1 file changed, 6 insertions(+), 14 deletions(-) diff --git a/chattool/checkpoint.py b/chattool/checkpoint.py index 234bc08..1a1bddd 100644 --- a/chattool/checkpoint.py +++ b/chattool/checkpoint.py @@ -24,21 +24,13 @@ def load_chats( checkpoint:str): # get the chatlogs logs = [json.loads(txt) for txt in txts] # mapping from index to chat object - idx2chatlog = { - log['index']: Chat(log['chat_log']) - for log in logs - } - num_unfinished, chat_objects = 0, [] # count the unfinished num - index, max_index = 0, max(idx2chatlog.keys()) - while index <= max_index: - chat_object = idx2chatlog.get(index) - if chat_object is None: - num_unfinished += 1 - chat_objects.append(chat_object) - index += 1 + idx2chatlog = { log['index']: Chat(log['chat_log']) for log in logs } + max_index = max(idx2chatlog.keys()) + chat_objects = [ idx2chatlog.get(index, None) for index in range(max_index+1)] + num_unfinished = chat_objects.count(None) # check if there are missing chatlogs if num_unfinished > 0: - warnings.warn(f"checkpoint file {checkpoint} has {num_unfinished} unfinished chats of {index} in total.") + warnings.warn(f"checkpoint file {checkpoint} has {num_unfinished} unfinished chats of {max_index + 1} in total.") # return Chat class return chat_objects @@ -75,4 +67,4 @@ def process_chats( data:List[Any] chat = data2chat(data[i]) chat.save(checkpoint, mode='a', index=i) chats[i] = chat - return chats \ No newline at end of file + return chats From cc2bbf7c3ccfecabb5e895e4c3a2e84bdaf0c668 Mon Sep 17 00:00:00 2001 From: rex <1073853456@qq.com> Date: Tue, 18 Jun 2024 16:46:08 +0800 Subject: [PATCH 3/3] update patch version --- chattool/__init__.py | 2 +- chattool/checkpoint.py | 7 ++++--- setup.py | 2 +- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/chattool/__init__.py b/chattool/__init__.py index ab56492..29e94bb 100644 --- a/chattool/__init__.py +++ b/chattool/__init__.py @@ -2,7 +2,7 @@ __author__ = """Rex Wang""" __email__ = '1073853456@qq.com' -__version__ = '3.3.1' +__version__ = '3.3.2' import os, sys, requests, json from .chattype import Chat, Resp diff --git a/chattool/checkpoint.py b/chattool/checkpoint.py index 1a1bddd..274326f 100644 --- a/chattool/checkpoint.py +++ b/chattool/checkpoint.py @@ -1,7 +1,8 @@ -import json, warnings, os +import json, os from typing import List, Dict, Union, Callable, Any from .chattype import Chat import tqdm +from loguru import logger def load_chats( checkpoint:str): """Load chats from a checkpoint file @@ -30,7 +31,7 @@ def load_chats( checkpoint:str): num_unfinished = chat_objects.count(None) # check if there are missing chatlogs if num_unfinished > 0: - warnings.warn(f"checkpoint file {checkpoint} has {num_unfinished} unfinished chats of {max_index + 1} in total.") + logger.warning(f"checkpoint file {checkpoint} has {num_unfinished}/{max_index+1} unfinished chats") # return Chat class return chat_objects @@ -57,7 +58,7 @@ def process_chats( data:List[Any] ## load chats from the checkpoint file chats = load_chats(checkpoint) if len(chats) > len(data): - warnings.warn(f"checkpoint file {checkpoint} has more chats than the data to be processed") + logger.warning(f"checkpoint file {checkpoint} has more chats than the data to be processed") return chats[:len(data)] chats.extend([None] * (len(data) - len(chats))) ## process chats diff --git a/setup.py b/setup.py index 3ed4b55..5a62168 100644 --- a/setup.py +++ b/setup.py @@ -7,7 +7,7 @@ with open('README.md') as readme_file: readme = readme_file.read() -VERSION = '3.3.1' +VERSION = '3.3.2' requirements = [ 'Click>=7.0', 'requests>=2.20', "responses>=0.23", 'aiohttp>=3.8',