diff --git a/README.md b/README.md index fc09a18..8e407df 100644 --- a/README.md +++ b/README.md @@ -3,11 +3,12 @@ This GitHub repository contains the implementation of a telegram bot, designed to facilitate seamless interaction with GPT-3.5 and GPT-4, state-of-the-art language models by OpenAI. 🔥 **GPT-4 Turbo support (with vision)** +🔥 **Custom OpenAI API compatible endpoints support** 🔥 **DALL-E 3 Image generation support** 🔑 **Key Features** -1. **Model Support**: gpt-3.5-turbo, gpt-4-turbo, gpt-4, gpt-4-turbo-preview, gpt-4-vision-preview. +1. **Model Support**: all OpenAI models are supported out of the box. Also you can add OpenAI API compatible endpoints by adding them to `app/llm_models.py` 2. **Image Generation**: You can ask bot to generate images using DALL-E 3 model, use bot just like official chatgpt app. 3. **Dynamic Dialog Management**: The bot automatically manages the context of the conversation, eliminating the need for the user to manually reset the context using the /reset command. You still can reset dialog manually if needed. 4. **Automatic Context Summarization**: In case the context size exceeds the model's maximum limit, the bot automatically summarizes the context to ensure the continuity of the conversation. diff --git a/app/bot/batched_input_handler.py b/app/bot/batched_input_handler.py index 1625cf3..5fd042d 100644 --- a/app/bot/batched_input_handler.py +++ b/app/bot/batched_input_handler.py @@ -11,6 +11,7 @@ import settings from app.bot.message_processor import MessageProcessor from app.bot.utils import TypingWorker, message_is_forward, get_username, Timer, generate_document_id +from app.llm_models import get_model_by_name from app.openai_helpers.whisper import get_audio_speech_to_text from app.storage.db import User, MessageType from app.storage.user_role import check_access_conditions @@ -112,10 +113,18 @@ async def process_batch(self, messages_batch: List[types.Message], user: User): for message in messages_batch: if message.audio: await self.handle_voice(message, user, message_processor) - if message.voice: + elif message.voice: await self.handle_voice(message, user, message_processor) - if message.document: + elif message.document: await self.handle_document(message, user, message_processor) + elif message.photo: + # handling image just like message but with some additional checks + llm_model = get_model_by_name(user.current_model) + if llm_model.capabilities.image_processing: + await self.handle_message(message, user, message_processor) + else: + # TODO: exception is a bad way to handle this, need to find a better way + raise ValueError(f'Image processing is not supported by {llm_model.model_name} model.') else: await self.handle_message(message, user, message_processor) diff --git a/app/bot/message_processor.py b/app/bot/message_processor.py index 702d7d1..7f02ccd 100644 --- a/app/bot/message_processor.py +++ b/app/bot/message_processor.py @@ -10,6 +10,7 @@ from app.bot.utils import send_telegram_message, detect_and_extract_code, edit_telegram_message from app.context.context_manager import build_context_manager from app.context.dialog_manager import DialogUtils +from app.llm_models import get_model_by_name from app.openai_helpers.chatgpt import ChatGPT from app.openai_helpers.count_tokens import calculate_image_tokens from app.storage.db import DB, User, MessageType @@ -48,11 +49,12 @@ async def add_message_as_context(self, message_id: int = None, message: Message @staticmethod async def prepare_user_message(message: Message): - content = [] - if message.text: - content.append(DialogUtils.construct_message_content_part(DialogUtils.CONTENT_TEXT, message.text)) - if message.photo: + content = [] + + if message.text: + content.append(DialogUtils.construct_message_content_part(DialogUtils.CONTENT_TEXT, message.text)) + # largest photo photo = message.photo[-1] file_id = photo.file_id @@ -63,12 +65,19 @@ async def prepare_user_message(message: Message): file_url = urljoin(f'{settings.IMAGE_PROXY_URL}:{settings.IMAGE_PROXY_PORT}', f'{file_id}_{tokens}.jpg') content.append(DialogUtils.construct_message_content_part(DialogUtils.CONTENT_IMAGE_URL, file_url)) - return DialogUtils.prepare_user_message(content) + return DialogUtils.prepare_user_message(content) + elif message.text: + return DialogUtils.prepare_user_message(message.text) + else: + ValueError("prepare_user_message called with empty message") async def process(self, is_cancelled): context_manager = await self.context_manager() - function_storage = await context_manager.get_function_storage() + llm_model = get_model_by_name(self.user.current_model) + function_storage = None + if llm_model.capabilities.tool_calling or llm_model.capabilities.function_calling: + function_storage = await context_manager.get_function_storage() system_prompt = await context_manager.get_system_prompt() chat_gpt_manager = ChatGptManager(ChatGPT(self.user.current_model, system_prompt, function_storage), self.db) diff --git a/app/bot/settings_menu.py b/app/bot/settings_menu.py index 8ebfa05..e2be2ae 100644 --- a/app/bot/settings_menu.py +++ b/app/bot/settings_menu.py @@ -2,6 +2,7 @@ from aiogram import Bot, types, Dispatcher +from app.llm_models import get_models from app.storage.db import User, DB from app.storage.user_role import check_access_conditions, UserRole @@ -16,6 +17,8 @@ 'gpt-4': 'GPT-4' } +ALL_MODELS_OPTIONS = list(get_models().keys()) + TTS_VOICES = ['alloy', 'echo', 'fable', 'onyx', 'nova', 'shimmer'] SETTINGS_PREFIX = 'settings' @@ -99,6 +102,7 @@ def __init__(self, bot: Bot, dispatcher: Dispatcher, db: DB): self.settings = { 'current_model': VisibleOptionsSetting('current_model', GPT_MODELS_OPTIONS), 'current_model_preview': VisibleOptionsSetting('current_model', OLD_MODELS_OPTIONS), + 'all_models': ChoiceSetting('Model', 'current_model', ALL_MODELS_OPTIONS), 'gpt_mode': ChoiceSetting('GPT mode', 'gpt_mode', list(settings.gpt_mode.keys())), 'use_functions': OnOffSetting('Use functions', 'use_functions'), 'image_generation': OnOffSetting('Image generation', 'image_generation'), @@ -106,13 +110,14 @@ def __init__(self, bot: Bot, dispatcher: Dispatcher, db: DB): 'tts-voice': ChoiceSetting('TTS voice', 'tts_voice', TTS_VOICES), 'voice_as_prompt': OnOffSetting('Voice as prompt', 'voice_as_prompt'), 'function_call_verbose': OnOffSetting('Verbose function calls', 'function_call_verbose'), - # 'streaming_answers': OnOffSetting('Streaming answers', 'streaming_answers'), + 'streaming_answers': OnOffSetting('Streaming answers', 'streaming_answers'), # 'auto_summarize': OnOffSetting('Auto summarize', 'auto_summarize'), # 'forward_as_prompt': OnOffSetting('Forward as prompt', 'forward_as_prompt'), } self.minimum_required_roles = { 'current_model': settings.USER_ROLE_CHOOSE_MODEL, 'current_model_preview': UserRole.ADMIN, + 'all_models': UserRole.ADMIN, 'image_generation': settings.USER_ROLE_IMAGE_GENERATION, 'tts-voice': settings.USER_ROLE_TTS, 'streaming_answers': settings.USER_ROLE_STREAMING_ANSWERS, diff --git a/app/bot/telegram_bot.py b/app/bot/telegram_bot.py index a9f4932..edcce9e 100644 --- a/app/bot/telegram_bot.py +++ b/app/bot/telegram_bot.py @@ -18,7 +18,6 @@ calculate_image_generation_usage_price, calculate_tts_usage_price) from app.storage.db import DBFactory, User from app.storage.user_role import check_access_conditions, UserRole -from app.openai_helpers.chatgpt import GptModel from aiogram import types, Bot, Dispatcher from aiogram.utils import executor diff --git a/app/context/context_manager.py b/app/context/context_manager.py index 493d82c..281ba27 100644 --- a/app/context/context_manager.py +++ b/app/context/context_manager.py @@ -1,4 +1,3 @@ -import dataclasses from typing import List, Optional from aiogram import types @@ -6,79 +5,12 @@ import settings from app.context.dialog_manager import DialogManager from app.context.function_manager import FunctionManager +from app.llm_models import get_model_by_name from app.openai_helpers.chatgpt import DialogMessage from app.openai_helpers.function_storage import FunctionStorage from app.storage.db import DB, User, MessageType -@dataclasses.dataclass -class ContextConfiguration: - model_name: str - - # long term memory is based on embedding context search - long_term_memory_tokens: int - # short term memory is used for storing last messages - short_term_memory_tokens: int - # length of summary to be generated when context is too long - summary_length: int - # hard limit for context size, when this limit is reached, processing is being stopped, - # summarization also cannot be done - hard_max_context_size: int - - @staticmethod - def get_config(model: str): - if model == 'gpt-3.5-turbo': - return ContextConfiguration( - model_name=model, - long_term_memory_tokens=512, - short_term_memory_tokens=2560, - summary_length=512, - hard_max_context_size=5*1024, - ) - elif model == 'gpt-3.5-turbo-16k': - return ContextConfiguration( - model_name=model, - long_term_memory_tokens=1024, - short_term_memory_tokens=4096, - summary_length=1024, - hard_max_context_size=17*1024, - ) - elif model == 'gpt-4': - return ContextConfiguration( - model_name=model, - long_term_memory_tokens=512, - short_term_memory_tokens=2048, - summary_length=1024, - hard_max_context_size=9*1024, - ) - elif model == 'gpt-4-turbo-preview': - return ContextConfiguration( - model_name=model, - long_term_memory_tokens=512, - short_term_memory_tokens=5120, - summary_length=2048, - hard_max_context_size=13*1024, - ) - elif model == 'gpt-4-vision-preview': - return ContextConfiguration( - model_name=model, - long_term_memory_tokens=512, - short_term_memory_tokens=5120, - summary_length=2048, - hard_max_context_size=13*1024, - ) - elif model == 'gpt-4-turbo': - return ContextConfiguration( - model_name=model, - long_term_memory_tokens=512, - short_term_memory_tokens=5120, - summary_length=2048, - hard_max_context_size=13 * 1024, - ) - else: - raise ValueError(f'Unknown model name: {model}') - - class ContextManager: def __init__(self, db: DB, user: User, message: types.Message): self.db = db @@ -88,7 +20,8 @@ def __init__(self, db: DB, user: User, message: types.Message): self.function_manager = None async def process_dialog(self): - context_configuration = ContextConfiguration.get_config(self.user.current_model) + llm_model = get_model_by_name(self.user.current_model) + context_configuration = llm_model.context_configuration self.dialog_manager = DialogManager(self.db, self.user, context_configuration) await self.dialog_manager.process_dialog(self.message) diff --git a/app/llm_models.py b/app/llm_models.py new file mode 100644 index 0000000..be1978c --- /dev/null +++ b/app/llm_models.py @@ -0,0 +1,192 @@ +import dataclasses +from decimal import Decimal +from functools import lru_cache + +import settings + + +@dataclasses.dataclass +class LLMPrice: + # price per 1000 tokens + input_tokens_price: Decimal + output_tokens_price: Decimal + + +@dataclasses.dataclass +class LLMContextConfiguration: + # long term memory is based on embedding context search + long_term_memory_tokens: int + # short term memory is used for storing last messages + short_term_memory_tokens: int + # length of summary to be generated when context is too long + summary_length: int + # hard limit for context size, when this limit is reached, processing is being stopped, + # summarization also cannot be done + hard_max_context_size: int + + +@dataclasses.dataclass +class LLMCapabilities: + function_calling: bool = False + tool_calling: bool = False + image_processing: bool = False + + +class LLModel: + GPT_35_TURBO = 'gpt-3.5-turbo' + GPT_35_TURBO_16K = 'gpt-3.5-turbo-16k' + GPT_4 = 'gpt-4' + GPT_4_TURBO = 'gpt-4-turbo' + GPT_4_TURBO_PREVIEW = 'gpt-4-turbo-preview' + GPT_4_VISION_PREVIEW = 'gpt-4-vision-preview' + LLAMA3 = 'llama3' + + def __init__(self, *, model_name: str, api_key, context_configuration, model_price=None, base_url=None, + capabilities=None): + if model_price is None: + model_price = LLMPrice(input_tokens_price=Decimal('0'), output_tokens_price=Decimal('0')) + + if capabilities is None: + capabilities = LLMCapabilities() + + self.model_name = model_name + self.api_key = api_key + self.context_configuration = context_configuration + self.model_price = model_price + self.base_url = base_url + self.capabilities = capabilities + + +@lru_cache +def get_models(): + models = {} + + if settings.OPENAI_TOKEN: + models.update({ + LLModel.GPT_35_TURBO: LLModel( + model_name=LLModel.GPT_35_TURBO, + api_key=settings.OPENAI_TOKEN, + context_configuration=LLMContextConfiguration( + long_term_memory_tokens=512, + short_term_memory_tokens=2560, + summary_length=512, + hard_max_context_size=5*1024, + ), + model_price=LLMPrice( + input_tokens_price=Decimal('0.0005'), + output_tokens_price=Decimal('0.0015'), + ), + capabilities=LLMCapabilities( + function_calling=True, + ), + ), + LLModel.GPT_35_TURBO_16K: LLModel( + model_name=LLModel.GPT_35_TURBO_16K, + api_key=settings.OPENAI_TOKEN, + context_configuration=LLMContextConfiguration( + long_term_memory_tokens=1024, + short_term_memory_tokens=4096, + summary_length=1024, + hard_max_context_size=17*1024, + ), + model_price=LLMPrice( + input_tokens_price=Decimal('0.003'), + output_tokens_price=Decimal('0.004'), + ), + capabilities=LLMCapabilities( + function_calling=True, + ), + ), + LLModel.GPT_4: LLModel( + model_name=LLModel.GPT_4, + api_key=settings.OPENAI_TOKEN, + context_configuration=LLMContextConfiguration( + long_term_memory_tokens=512, + short_term_memory_tokens=2048, + summary_length=1024, + hard_max_context_size=9*1024, + ), + model_price=LLMPrice( + input_tokens_price=Decimal('0.03'), + output_tokens_price=Decimal('0.06'), + ), + capabilities=LLMCapabilities( + function_calling=True, + ), + ), + LLModel.GPT_4_TURBO: LLModel( + model_name=LLModel.GPT_4_TURBO, + api_key=settings.OPENAI_TOKEN, + context_configuration=LLMContextConfiguration( + long_term_memory_tokens=512, + short_term_memory_tokens=5120, + summary_length=2048, + hard_max_context_size=13*1024, + ), + model_price=LLMPrice( + input_tokens_price=Decimal('0.01'), + output_tokens_price=Decimal('0.03'), + ), + capabilities=LLMCapabilities( + function_calling=True, + image_processing=True, + ), + ), + LLModel.GPT_4_TURBO_PREVIEW: LLModel( + model_name=LLModel.GPT_4_TURBO_PREVIEW, + api_key=settings.OPENAI_TOKEN, + context_configuration=LLMContextConfiguration( + long_term_memory_tokens=512, + short_term_memory_tokens=5120, + summary_length=2048, + hard_max_context_size=13*1024, + ), + model_price=LLMPrice( + input_tokens_price=Decimal('0.01'), + output_tokens_price=Decimal('0.03'), + ), + capabilities=LLMCapabilities( + function_calling=True, + ), + ), + LLModel.GPT_4_VISION_PREVIEW: LLModel( + model_name=LLModel.GPT_4_VISION_PREVIEW, + api_key=settings.OPENAI_TOKEN, + context_configuration=LLMContextConfiguration( + long_term_memory_tokens=512, + short_term_memory_tokens=5120, + summary_length=2048, + hard_max_context_size=13*1024, + ), + model_price=LLMPrice( + input_tokens_price=Decimal('0.01'), + output_tokens_price=Decimal('0.03'), + ), + capabilities=LLMCapabilities( + image_processing=True, + ), + ), + }) + + # example of using llama3 model in ollama + if settings.OLLAMA_BASE_URL: + models[LLModel.LLAMA3] = LLModel( + model_name=LLModel.LLAMA3, + api_key=settings.OLLAMA_API_KEY, + context_configuration=LLMContextConfiguration( + long_term_memory_tokens=512, + short_term_memory_tokens=2048, + summary_length=512, + hard_max_context_size=13*1024, + ), + base_url=settings.OLLAMA_BASE_URL, + ) + + return models + + +def get_model_by_name(model_name: str): + model = get_models().get(model_name) + if not model: + raise ValueError(f"Unknown model: {model_name}") + return model diff --git a/app/openai_helpers/chatgpt.py b/app/openai_helpers/chatgpt.py index 47f8ce5..e412434 100644 --- a/app/openai_helpers/chatgpt.py +++ b/app/openai_helpers/chatgpt.py @@ -4,25 +4,13 @@ import settings from app.bot.utils import merge_dicts +from app.llm_models import LLModel, get_model_by_name from app.openai_helpers.count_tokens import count_messages_tokens, count_tokens_from_functions, count_string_tokens from app.openai_helpers.function_storage import FunctionStorage import pydantic -from app.openai_helpers.utils import OpenAIAsync - - -class GptModel: - GPT_35_TURBO = 'gpt-3.5-turbo' - GPT_35_TURBO_16K = 'gpt-3.5-turbo-16k' - GPT_4 = 'gpt-4' - GPT_4_TURBO = 'gpt-4-turbo' - GPT_4_TURBO_PREVIEW = 'gpt-4-turbo-preview' - GPT_4_VISION_PREVIEW = 'gpt-4-vision-preview' - - -GPT_MODELS = {GptModel.GPT_35_TURBO, GptModel.GPT_35_TURBO_16K, GptModel.GPT_4, GptModel.GPT_4_TURBO, - GptModel.GPT_4_TURBO_PREVIEW, GptModel.GPT_4_VISION_PREVIEW} +from app.openai_helpers.llm_client import OpenAILLMClient class FunctionCall(pydantic.BaseModel): @@ -88,20 +76,18 @@ def openai_message(self): class ChatGPT: def __init__(self, model, system_prompt: str, function_storage: FunctionStorage = None): self.function_storage = function_storage - if model not in GPT_MODELS: - raise ValueError(f"Unknown model: {model}") - self.model = model + self.llm_model = get_model_by_name(model) self.system_prompt = system_prompt async def send_messages(self, messages_to_send: List[DialogMessage]) -> (DialogMessage, CompletionUsage): additional_fields = {} if self.function_storage is not None: additional_fields.update({ - 'functions': self.function_storage.get_openai_prompt(), + 'functions': self.function_storage.get_functions_info(), 'function_call': 'auto', }) - if self.model == GptModel.GPT_4_VISION_PREVIEW: + if self.llm_model.model_name == LLModel.GPT_4_VISION_PREVIEW: # TODO: somewhy by default it's 16 tokens for this model additional_fields['max_tokens'] = 4096 @@ -112,13 +98,13 @@ async def send_messages(self, messages_to_send: List[DialogMessage]) -> (DialogM del additional_fields['functions'] messages = self.create_context(messages_to_send, self.system_prompt) - resp = await OpenAIAsync.instance().chat.completions.create( - model=self.model, + resp = await OpenAILLMClient.get_client(self.llm_model.model_name).chat.completions.create( + model=self.llm_model.model_name, messages=messages, temperature=settings.OPENAI_CHAT_COMPLETION_TEMPERATURE, **additional_fields, ) - completion_usage = CompletionUsage(model=self.model, **dict(resp.usage)) + completion_usage = CompletionUsage(model=self.llm_model.model_name, **dict(resp.usage)) message = resp.choices[0].message response = DialogMessage(**dict(message)) return response, completion_usage @@ -127,17 +113,18 @@ async def send_messages_streaming(self, messages_to_send: List[DialogMessage], i prompt_tokens = 0 additional_fields = {} - system_prompt_addition = None if self.function_storage is not None: - system_prompt_addition = self.function_storage.get_system_prompt_addition() - functions = self.function_storage.get_openai_prompt() - prompt_tokens += count_tokens_from_functions(functions, self.model) - additional_fields.update({ - 'functions': self.function_storage.get_openai_prompt(), - 'function_call': 'auto', - }) - - if self.model == GptModel.GPT_4_VISION_PREVIEW: + if self.llm_model.capabilities.function_calling: + functions = self.function_storage.get_functions_info() + prompt_tokens += count_tokens_from_functions(functions, self.llm_model.model_name) + additional_fields.update({ + 'functions': self.function_storage.get_functions_info(), + 'function_call': 'auto', + }) + elif self.llm_model.capabilities.tool_calling: + NotImplementedError('Tool calling support is not implemented yet') + + if self.llm_model.model_name == LLModel.GPT_4_VISION_PREVIEW: # TODO: somewhy by default it's 16 tokens for this model additional_fields['max_tokens'] = 4096 @@ -148,15 +135,15 @@ async def send_messages_streaming(self, messages_to_send: List[DialogMessage], i del additional_fields['functions'] messages = self.create_context(messages_to_send, self.system_prompt) - resp_generator = await OpenAIAsync.instance().chat.completions.create( - model=self.model, + resp_generator = await OpenAILLMClient.get_client(self.llm_model.model_name).chat.completions.create( + model=self.llm_model.model_name, messages=messages, temperature=settings.OPENAI_CHAT_COMPLETION_TEMPERATURE, stream=True, **additional_fields, ) - prompt_tokens += count_messages_tokens(messages, self.model) + prompt_tokens += count_messages_tokens(messages, self.llm_model.model_name) result_dict = {} async for resp_part in resp_generator: delta = resp_part.choices[0].delta @@ -168,19 +155,19 @@ async def send_messages_streaming(self, messages_to_send: List[DialogMessage], i continue result_dict = merge_dicts(result_dict, dict(delta)) dialog_message = DialogMessage(**result_dict) - completion_tokens = count_messages_tokens([result_dict], model=self.model) + completion_tokens = count_messages_tokens([result_dict], model=self.llm_model.model_name) elif delta.function_call is not None: result_dict = merge_dicts(result_dict, dict(delta.function_call)) dialog_message = DialogMessage(function_call=result_dict) # TODO: find mode accurate way to calculate completion length for function calls - completion_tokens = count_string_tokens(json.dumps(result_dict), model=self.model) + completion_tokens = count_string_tokens(json.dumps(result_dict), model=self.llm_model.model_name) else: continue # openai doesn't return this field in streaming mode somewhy dialog_message.role = 'assistant' completion_usage = CompletionUsage( - model=self.model, + model=self.llm_model.model_name, prompt_tokens=prompt_tokens, completion_tokens=completion_tokens, total_tokens=prompt_tokens + completion_tokens, @@ -207,7 +194,7 @@ async def summarize_messages(messages: List[DialogMessage], model: str, summary_ "role": "user", "content": f"Summarize this conversation in {summary_max_length} characters or less. Divide different themes explicitly with new lines. Return only text of summary, nothing else.", }] - resp = await OpenAIAsync.instance().chat.completions.create( + resp = await OpenAILLMClient.get_client(model).chat.completions.create( model=model, messages=prompt_messages, temperature=settings.OPENAI_CHAT_COMPLETION_TEMPERATURE, diff --git a/app/openai_helpers/count_tokens.py b/app/openai_helpers/count_tokens.py index 40cebac..3e495e0 100644 --- a/app/openai_helpers/count_tokens.py +++ b/app/openai_helpers/count_tokens.py @@ -21,7 +21,8 @@ def count_string_tokens(string: str, model="gpt-3.5-turbo") -> int: elif "gpt-4" in model: model = "gpt-4" else: - raise ValueError(f"Unknown model: {model}") + # TODO: add method to calculate tokens for different models + model = "gpt-4" encoding = tiktoken.encoding_for_model(model) return len(encoding.encode(str(string))) @@ -43,7 +44,11 @@ def count_messages_tokens(messages: List[dict], model="gpt-3.5-turbo") -> int: tokens_per_message = 3 tokens_per_name = 1 - encoding = tiktoken.encoding_for_model(model) + try: + encoding = tiktoken.encoding_for_model(model) + except: + # TODO: add method to calculate tokens for different models + encoding = tiktoken.encoding_for_model("gpt-4") num_tokens = 0 for message in messages: @@ -84,7 +89,8 @@ def count_tokens_from_functions(functions, model="gpt-3.5-turbo"): elif "gpt-4" in model: model = "gpt-4" else: - raise ValueError(f"Unknown model: {model}") + # TODO: add method to calculate tokens for different models + model = "gpt-4" encoding = tiktoken.encoding_for_model(model) num_tokens = 0 diff --git a/app/openai_helpers/function_storage.py b/app/openai_helpers/function_storage.py index 9d12a3b..4380c12 100644 --- a/app/openai_helpers/function_storage.py +++ b/app/openai_helpers/function_storage.py @@ -21,7 +21,7 @@ def extract_function_info(function) -> Dict[str, Any]: "parameters": function.get_params_schema(), } - def get_openai_prompt(self): + def get_functions_info(self): functions = [] for function in self.functions.values(): function_info = function['info'] @@ -29,6 +29,17 @@ def get_openai_prompt(self): return functions + def get_tools_info(self): + tools = [] + for function in self.functions.values(): + function_info = { + "type": "function", + "function": function['info'], + } + tools.append(function_info) + + return tools + def get_system_prompt_addition(self) -> str: result = [] for function in self.functions.values(): diff --git a/app/openai_helpers/llm_client.py b/app/openai_helpers/llm_client.py new file mode 100644 index 0000000..c528871 --- /dev/null +++ b/app/openai_helpers/llm_client.py @@ -0,0 +1,19 @@ +import openai + +from app.llm_models import get_model_by_name + + +class OpenAILLMClient: + _model_clients = {} + + @classmethod + def get_client(cls, model_name: str): + if model_name not in cls._model_clients: + llm_model = get_model_by_name(model_name) + params = { + 'api_key': llm_model.api_key, + } + if llm_model.base_url: + params['base_url'] = llm_model.base_url + cls._model_clients[model_name] = openai.AsyncOpenAI(**params) + return cls._model_clients[model_name] diff --git a/app/openai_helpers/utils.py b/app/openai_helpers/utils.py index 91e9334..f822451 100644 --- a/app/openai_helpers/utils.py +++ b/app/openai_helpers/utils.py @@ -1,16 +1,7 @@ from decimal import Decimal import openai - -COMPLETION_PRICE = { - 'gpt-3.5-turbo': (Decimal('0.0005'), Decimal('0.0015')), - 'gpt-3.5-turbo-16k': (Decimal('0.003'), Decimal('0.004')), - 'gpt-4': (Decimal('0.03'), Decimal('0.06')), - 'gpt-4-1106-preview': (Decimal('0.01'), Decimal('0.03')), - 'gpt-4-vision-preview': (Decimal('0.01'), Decimal('0.03')), - 'gpt-4-turbo-preview': (Decimal('0.01'), Decimal('0.03')), - 'gpt-4-turbo': (Decimal('0.01'), Decimal('0.03')), -} +from app.llm_models import get_model_by_name WHISPER_PRICE = Decimal('0.006') @@ -29,10 +20,11 @@ def calculate_completion_usage_price(prompt_tokens: int, completion_tokens: int, model: str) -> Decimal: - price = COMPLETION_PRICE.get(model) + llm_model = get_model_by_name(model) + price = llm_model.model_price if not price: raise ValueError(f"Unknown model: {model}") - prompt_price, completion_price = price + prompt_price, completion_price = price.input_tokens_price, price.output_tokens_price return prompt_price * prompt_tokens / 1000 + completion_price * completion_tokens / 1000 @@ -54,14 +46,25 @@ def calculate_image_generation_usage_price(model, resolution, num_images): class OpenAIAsync: _key = None + _base_url = None _instance = None @classmethod - def init(cls, api_key): + def init(cls, api_key, base_url=None): cls._key = api_key + cls._base_url = base_url @classmethod def instance(cls): + params = {} + if cls._base_url: + params['base_url'] = cls._base_url + + if cls._key is None: + raise ValueError("OpenAIAsync is not initialized") + + params['api_key'] = cls._key + if cls._instance is None: - cls._instance = openai.AsyncOpenAI(api_key=cls._key) + cls._instance = openai.AsyncOpenAI(**params) return cls._instance diff --git a/main.py b/main.py index b731c5c..0bddc3f 100644 --- a/main.py +++ b/main.py @@ -13,6 +13,7 @@ if __name__ == '__main__': + # needed for whisper and tts capabilities OpenAIAsync.init(settings.OPENAI_TOKEN) telegram_bot = TelegramBot(bot, dp) telegram_bot.run() diff --git a/settings.py b/settings.py index 17a8e0c..be9a88d 100644 --- a/settings.py +++ b/settings.py @@ -17,11 +17,14 @@ 'and assist users to the best of your abilities. Listen carefully to what they say, ask questions, ' 'and help in any way you can. Avoid giving advices, your ultimate goal is to help the user to find the right solution by himself. ' 'Ask only one question a time.', - } + }, + 'ai dungeon': { # free to be deleted, also you can add new ones + 'system': 'You are the AI Dungeon game. Your task is to entertain user with role play. User creates a setup and you play role of the world and characters in it.', + }, } # Mandatory settings -OPENAI_TOKEN = 'YOUR_TOKEN' +OPENAI_TOKEN = '' TELEGRAM_BOT_TOKEN = 'YOUR_TOKEN' # Image proxy settings # This proxy is used to send images to openai for GPT-4-Vision @@ -69,6 +72,9 @@ IMAGE_PROXY_BIND_HOST = '0.0.0.0' IMAGE_PROXY_BIND_PORT = 8321 +OLLAMA_BASE_URL = '' +OLLAMA_API_KEY = 'ollama' + # Vectara RAG settings # this feature is highly experimental and not recommended to be used in it's current state # currently it even doesn't have instructions on how to setup, use it only if you feel experimenalish