Merge pull request #30 from ijwfly/experimental/ollama-test

Simplification of adding new models
ijwfly · May 2, 2024 · 014ad37 · 014ad37
2 parents 5d6ddba + 63c9643
commit 014ad37
Show file tree

Hide file tree

Showing 14 changed files with 321 additions and 140 deletions.
diff --git a/README.md b/README.md
@@ -3,11 +3,12 @@
 This GitHub repository contains the implementation of a telegram bot, designed to facilitate seamless interaction with GPT-3.5 and GPT-4, state-of-the-art language models by OpenAI.  
 
 🔥 **GPT-4 Turbo support (with vision)**  
+🔥 **Custom OpenAI API compatible endpoints support**  
 🔥 **DALL-E 3 Image generation support**
 
 🔑 **Key Features**
 
-1. **Model Support**: gpt-3.5-turbo, gpt-4-turbo, gpt-4, gpt-4-turbo-preview, gpt-4-vision-preview.
+1. **Model Support**: all OpenAI models are supported out of the box. Also you can add OpenAI API compatible endpoints by adding them to `app/llm_models.py`
 2. **Image Generation**: You can ask bot to generate images using DALL-E 3 model, use bot just like official chatgpt app.
 3. **Dynamic Dialog Management**: The bot automatically manages the context of the conversation, eliminating the need for the user to manually reset the context using the /reset command. You still can reset dialog manually if needed.
 4. **Automatic Context Summarization**: In case the context size exceeds the model's maximum limit, the bot automatically summarizes the context to ensure the continuity of the conversation.

diff --git a/app/bot/batched_input_handler.py b/app/bot/batched_input_handler.py
@@ -11,6 +11,7 @@
 import settings
 from app.bot.message_processor import MessageProcessor
 from app.bot.utils import TypingWorker, message_is_forward, get_username, Timer, generate_document_id
+from app.llm_models import get_model_by_name
 from app.openai_helpers.whisper import get_audio_speech_to_text
 from app.storage.db import User, MessageType
 from app.storage.user_role import check_access_conditions
@@ -112,10 +113,18 @@ async def process_batch(self, messages_batch: List[types.Message], user: User):
             for message in messages_batch:
                 if message.audio:
                     await self.handle_voice(message, user, message_processor)
-                if message.voice:
+                elif message.voice:
                     await self.handle_voice(message, user, message_processor)
-                if message.document:
+                elif message.document:
                     await self.handle_document(message, user, message_processor)
+                elif message.photo:
+                    # handling image just like message but with some additional checks
+                    llm_model = get_model_by_name(user.current_model)
+                    if llm_model.capabilities.image_processing:
+                        await self.handle_message(message, user, message_processor)
+                    else:
+                        # TODO: exception is a bad way to handle this, need to find a better way
+                        raise ValueError(f'Image processing is not supported by {llm_model.model_name} model.')
                 else:
                     await self.handle_message(message, user, message_processor)
 

diff --git a/app/bot/message_processor.py b/app/bot/message_processor.py
@@ -10,6 +10,7 @@
 from app.bot.utils import send_telegram_message, detect_and_extract_code, edit_telegram_message
 from app.context.context_manager import build_context_manager
 from app.context.dialog_manager import DialogUtils
+from app.llm_models import get_model_by_name
 from app.openai_helpers.chatgpt import ChatGPT
 from app.openai_helpers.count_tokens import calculate_image_tokens
 from app.storage.db import DB, User, MessageType
@@ -48,11 +49,12 @@ async def add_message_as_context(self, message_id: int = None, message: Message
 
     @staticmethod
     async def prepare_user_message(message: Message):
-        content = []
-        if message.text:
-            content.append(DialogUtils.construct_message_content_part(DialogUtils.CONTENT_TEXT, message.text))
-
         if message.photo:
+            content = []
+
+            if message.text:
+                content.append(DialogUtils.construct_message_content_part(DialogUtils.CONTENT_TEXT, message.text))
+
             # largest photo
             photo = message.photo[-1]
             file_id = photo.file_id
@@ -63,12 +65,19 @@ async def prepare_user_message(message: Message):
             file_url = urljoin(f'{settings.IMAGE_PROXY_URL}:{settings.IMAGE_PROXY_PORT}', f'{file_id}_{tokens}.jpg')
             content.append(DialogUtils.construct_message_content_part(DialogUtils.CONTENT_IMAGE_URL, file_url))
 
-        return DialogUtils.prepare_user_message(content)
+            return DialogUtils.prepare_user_message(content)
+        elif message.text:
+            return DialogUtils.prepare_user_message(message.text)
+        else:
+            ValueError("prepare_user_message called with empty message")
 
     async def process(self, is_cancelled):
         context_manager = await self.context_manager()
 
-        function_storage = await context_manager.get_function_storage()
+        llm_model = get_model_by_name(self.user.current_model)
+        function_storage = None
+        if llm_model.capabilities.tool_calling or llm_model.capabilities.function_calling:
+            function_storage = await context_manager.get_function_storage()
         system_prompt = await context_manager.get_system_prompt()
         chat_gpt_manager = ChatGptManager(ChatGPT(self.user.current_model, system_prompt, function_storage), self.db)
 

diff --git a/app/bot/settings_menu.py b/app/bot/settings_menu.py
@@ -2,6 +2,7 @@
 
 from aiogram import Bot, types, Dispatcher
 
+from app.llm_models import get_models
 from app.storage.db import User, DB
 from app.storage.user_role import check_access_conditions, UserRole
 
@@ -16,6 +17,8 @@
     'gpt-4': 'GPT-4'
 }
 
+ALL_MODELS_OPTIONS = list(get_models().keys())
+
 TTS_VOICES = ['alloy', 'echo', 'fable', 'onyx', 'nova', 'shimmer']
 
 SETTINGS_PREFIX = 'settings'
@@ -99,20 +102,22 @@ def __init__(self, bot: Bot, dispatcher: Dispatcher, db: DB):
         self.settings = {
             'current_model': VisibleOptionsSetting('current_model', GPT_MODELS_OPTIONS),
             'current_model_preview': VisibleOptionsSetting('current_model', OLD_MODELS_OPTIONS),
+            'all_models': ChoiceSetting('Model', 'current_model', ALL_MODELS_OPTIONS),
             'gpt_mode': ChoiceSetting('GPT mode', 'gpt_mode', list(settings.gpt_mode.keys())),
             'use_functions': OnOffSetting('Use functions', 'use_functions'),
             'image_generation': OnOffSetting('Image generation', 'image_generation'),
             'system_prompt_settings_enabled': OnOffSetting('User info saving', 'system_prompt_settings_enabled'),
             'tts-voice': ChoiceSetting('TTS voice', 'tts_voice', TTS_VOICES),
             'voice_as_prompt': OnOffSetting('Voice as prompt', 'voice_as_prompt'),
             'function_call_verbose': OnOffSetting('Verbose function calls', 'function_call_verbose'),
-            # 'streaming_answers': OnOffSetting('Streaming answers', 'streaming_answers'),
+            'streaming_answers': OnOffSetting('Streaming answers', 'streaming_answers'),
             # 'auto_summarize': OnOffSetting('Auto summarize', 'auto_summarize'),
             # 'forward_as_prompt': OnOffSetting('Forward as prompt', 'forward_as_prompt'),
         }
         self.minimum_required_roles = {
             'current_model': settings.USER_ROLE_CHOOSE_MODEL,
             'current_model_preview': UserRole.ADMIN,
+            'all_models': UserRole.ADMIN,
             'image_generation': settings.USER_ROLE_IMAGE_GENERATION,
             'tts-voice': settings.USER_ROLE_TTS,
             'streaming_answers': settings.USER_ROLE_STREAMING_ANSWERS,

diff --git a/app/bot/telegram_bot.py b/app/bot/telegram_bot.py
@@ -18,7 +18,6 @@
                                       calculate_image_generation_usage_price, calculate_tts_usage_price)
 from app.storage.db import DBFactory, User
 from app.storage.user_role import check_access_conditions, UserRole
-from app.openai_helpers.chatgpt import GptModel
 
 from aiogram import types, Bot, Dispatcher
 from aiogram.utils import executor

diff --git a/app/context/context_manager.py b/app/context/context_manager.py
@@ -1,84 +1,16 @@
-import dataclasses
 from typing import List, Optional
 
 from aiogram import types
 
 import settings
 from app.context.dialog_manager import DialogManager
 from app.context.function_manager import FunctionManager
+from app.llm_models import get_model_by_name
 from app.openai_helpers.chatgpt import DialogMessage
 from app.openai_helpers.function_storage import FunctionStorage
 from app.storage.db import DB, User, MessageType
 
 
-@dataclasses.dataclass
-class ContextConfiguration:
-    model_name: str
-
-    # long term memory is based on embedding context search
-    long_term_memory_tokens: int
-    # short term memory is used for storing last messages
-    short_term_memory_tokens: int
-    # length of summary to be generated when context is too long
-    summary_length: int
-    # hard limit for context size, when this limit is reached, processing is being stopped,
-    # summarization also cannot be done
-    hard_max_context_size: int
-
-    @staticmethod
-    def get_config(model: str):
-        if model == 'gpt-3.5-turbo':
-            return ContextConfiguration(
-                model_name=model,
-                long_term_memory_tokens=512,
-                short_term_memory_tokens=2560,
-                summary_length=512,
-                hard_max_context_size=5*1024,
-            )
-        elif model == 'gpt-3.5-turbo-16k':
-            return ContextConfiguration(
-                model_name=model,
-                long_term_memory_tokens=1024,
-                short_term_memory_tokens=4096,
-                summary_length=1024,
-                hard_max_context_size=17*1024,
-            )
-        elif model == 'gpt-4':
-            return ContextConfiguration(
-                model_name=model,
-                long_term_memory_tokens=512,
-                short_term_memory_tokens=2048,
-                summary_length=1024,
-                hard_max_context_size=9*1024,
-            )
-        elif model == 'gpt-4-turbo-preview':
-            return ContextConfiguration(
-                model_name=model,
-                long_term_memory_tokens=512,
-                short_term_memory_tokens=5120,
-                summary_length=2048,
-                hard_max_context_size=13*1024,
-            )
-        elif model == 'gpt-4-vision-preview':
-            return ContextConfiguration(
-                model_name=model,
-                long_term_memory_tokens=512,
-                short_term_memory_tokens=5120,
-                summary_length=2048,
-                hard_max_context_size=13*1024,
-            )
-        elif model == 'gpt-4-turbo':
-            return ContextConfiguration(
-                model_name=model,
-                long_term_memory_tokens=512,
-                short_term_memory_tokens=5120,
-                summary_length=2048,
-                hard_max_context_size=13 * 1024,
-            )
-        else:
-            raise ValueError(f'Unknown model name: {model}')
-
-
 class ContextManager:
     def __init__(self, db: DB, user: User, message: types.Message):
         self.db = db
@@ -88,7 +20,8 @@ def __init__(self, db: DB, user: User, message: types.Message):
         self.function_manager = None
 
     async def process_dialog(self):
-        context_configuration = ContextConfiguration.get_config(self.user.current_model)
+        llm_model = get_model_by_name(self.user.current_model)
+        context_configuration = llm_model.context_configuration
         self.dialog_manager = DialogManager(self.db, self.user, context_configuration)
         await self.dialog_manager.process_dialog(self.message)