From c417ba31cb05ec255569d9ce3b41257087515c10 Mon Sep 17 00:00:00 2001
From: Maxim Nechaev <maxnecha@gmail.com>
Date: Fri, 19 Apr 2024 16:56:48 +0300
Subject: [PATCH 01/10] llama3 ollama experimental test

---
 app/context/context_manager.py |  8 ++++++++
 app/openai_helpers/chatgpt.py  | 20 +++++++++++++-------
 app/openai_helpers/utils.py    | 14 ++++++++++++--
 main.py                        |  2 +-
 settings.py                    |  4 ++++
 5 files changed, 38 insertions(+), 10 deletions(-)

diff --git a/app/context/context_manager.py b/app/context/context_manager.py
index 493d82c..7c279b2 100644
--- a/app/context/context_manager.py
+++ b/app/context/context_manager.py
@@ -75,6 +75,14 @@ def get_config(model: str):
                 summary_length=2048,
                 hard_max_context_size=13 * 1024,
             )
+        elif model == 'llama3':
+            return ContextConfiguration(
+                model_name=model,
+                long_term_memory_tokens=512,
+                short_term_memory_tokens=2048,
+                summary_length=512,
+                hard_max_context_size=13 * 1024,
+            )
         else:
             raise ValueError(f'Unknown model name: {model}')
 
diff --git a/app/openai_helpers/chatgpt.py b/app/openai_helpers/chatgpt.py
index 47f8ce5..c428bdf 100644
--- a/app/openai_helpers/chatgpt.py
+++ b/app/openai_helpers/chatgpt.py
@@ -2,6 +2,8 @@
 from contextlib import suppress
 from typing import List, Any, Optional, Callable, Union
 
+from openai import BadRequestError
+
 import settings
 from app.bot.utils import merge_dicts
 from app.openai_helpers.count_tokens import count_messages_tokens, count_tokens_from_functions, count_string_tokens
@@ -19,6 +21,7 @@ class GptModel:
     GPT_4_TURBO = 'gpt-4-turbo'
     GPT_4_TURBO_PREVIEW = 'gpt-4-turbo-preview'
     GPT_4_VISION_PREVIEW = 'gpt-4-vision-preview'
+    LLAMA3 = 'llama3'
 
 
 GPT_MODELS = {GptModel.GPT_35_TURBO, GptModel.GPT_35_TURBO_16K, GptModel.GPT_4, GptModel.GPT_4_TURBO,
@@ -148,13 +151,16 @@ async def send_messages_streaming(self, messages_to_send: List[DialogMessage], i
                 del additional_fields['functions']
 
         messages = self.create_context(messages_to_send, self.system_prompt)
-        resp_generator = await OpenAIAsync.instance().chat.completions.create(
-            model=self.model,
-            messages=messages,
-            temperature=settings.OPENAI_CHAT_COMPLETION_TEMPERATURE,
-            stream=True,
-            **additional_fields,
-        )
+        try:
+            resp_generator = await OpenAIAsync.instance().chat.completions.create(
+                model=self.model,
+                messages=messages,
+                temperature=settings.OPENAI_CHAT_COMPLETION_TEMPERATURE,
+                stream=True,
+                **additional_fields,
+            )
+        except BadRequestError as e:
+            print(e)
 
         prompt_tokens += count_messages_tokens(messages, self.model)
         result_dict = {}
diff --git a/app/openai_helpers/utils.py b/app/openai_helpers/utils.py
index 91e9334..415b0ba 100644
--- a/app/openai_helpers/utils.py
+++ b/app/openai_helpers/utils.py
@@ -10,6 +10,7 @@
     'gpt-4-vision-preview': (Decimal('0.01'), Decimal('0.03')),
     'gpt-4-turbo-preview': (Decimal('0.01'), Decimal('0.03')),
     'gpt-4-turbo': (Decimal('0.01'), Decimal('0.03')),
+    'llama3': (Decimal('0'), Decimal('0')),
 }
 
 WHISPER_PRICE = Decimal('0.006')
@@ -54,14 +55,23 @@ def calculate_image_generation_usage_price(model, resolution, num_images):
 
 class OpenAIAsync:
     _key = None
+    _base_url = None
     _instance = None
 
     @classmethod
-    def init(cls, api_key):
+    def init(cls, api_key, base_url=None):
         cls._key = api_key
+        cls._base_url = base_url
 
     @classmethod
     def instance(cls):
+        params = {}
+        if cls._base_url:
+            params['base_url'] = cls._base_url
+
+        if cls._key is None:
+            raise ValueError("OpenAIAsync is not initialized")
+
         if cls._instance is None:
-            cls._instance = openai.AsyncOpenAI(api_key=cls._key)
+            cls._instance = openai.AsyncOpenAI(**params)
         return cls._instance
diff --git a/main.py b/main.py
index b731c5c..7314119 100644
--- a/main.py
+++ b/main.py
@@ -13,6 +13,6 @@
 
 
 if __name__ == '__main__':
-    OpenAIAsync.init(settings.OPENAI_TOKEN)
+    OpenAIAsync.init(settings.OLLAMA_API_KEY, settings.OLLAMA_BASE_URL)
     telegram_bot = TelegramBot(bot, dp)
     telegram_bot.run()
diff --git a/settings.py b/settings.py
index 17a8e0c..8d00fc9 100644
--- a/settings.py
+++ b/settings.py
@@ -69,6 +69,10 @@
 IMAGE_PROXY_BIND_HOST = '0.0.0.0'
 IMAGE_PROXY_BIND_PORT = 8321
 
+OLLAMA_BASE_URL = ''
+OLLAMA_MODEL = 'llama3'
+OLLAMA_API_KEY = 'ollama'
+
 # Vectara RAG settings
 # this feature is highly experimental and not recommended to be used in it's current state
 # currently it even doesn't have instructions on how to setup, use it only if you feel experimenalish

From 93faa146dcd0acd8d00bd1e6f7739c3c1dd405f0 Mon Sep 17 00:00:00 2001
From: Maxim Nechaev <maxnecha@gmail.com>
Date: Tue, 23 Apr 2024 23:05:06 +0300
Subject: [PATCH 02/10] Fix ollama api

---
 app/bot/message_processor.py  | 12 ++++++++----
 app/openai_helpers/chatgpt.py |  2 +-
 app/openai_helpers/utils.py   |  2 ++
 settings.py                   |  5 ++++-
 4 files changed, 15 insertions(+), 6 deletions(-)

diff --git a/app/bot/message_processor.py b/app/bot/message_processor.py
index 702d7d1..3275e1c 100644
--- a/app/bot/message_processor.py
+++ b/app/bot/message_processor.py
@@ -48,11 +48,13 @@ async def add_message_as_context(self, message_id: int = None, message: Message
 
     @staticmethod
     async def prepare_user_message(message: Message):
-        content = []
-        if message.text:
-            content.append(DialogUtils.construct_message_content_part(DialogUtils.CONTENT_TEXT, message.text))
 
         if message.photo:
+            content = []
+
+            if message.text:
+                content.append(DialogUtils.construct_message_content_part(DialogUtils.CONTENT_TEXT, message.text))
+
             # largest photo
             photo = message.photo[-1]
             file_id = photo.file_id
@@ -63,7 +65,9 @@ async def prepare_user_message(message: Message):
             file_url = urljoin(f'{settings.IMAGE_PROXY_URL}:{settings.IMAGE_PROXY_PORT}', f'{file_id}_{tokens}.jpg')
             content.append(DialogUtils.construct_message_content_part(DialogUtils.CONTENT_IMAGE_URL, file_url))
 
-        return DialogUtils.prepare_user_message(content)
+            return DialogUtils.prepare_user_message(content)
+        else:
+            return DialogUtils.prepare_user_message(message.text)
 
     async def process(self, is_cancelled):
         context_manager = await self.context_manager()
diff --git a/app/openai_helpers/chatgpt.py b/app/openai_helpers/chatgpt.py
index c428bdf..e580279 100644
--- a/app/openai_helpers/chatgpt.py
+++ b/app/openai_helpers/chatgpt.py
@@ -25,7 +25,7 @@ class GptModel:
 
 
 GPT_MODELS = {GptModel.GPT_35_TURBO, GptModel.GPT_35_TURBO_16K, GptModel.GPT_4, GptModel.GPT_4_TURBO,
-              GptModel.GPT_4_TURBO_PREVIEW, GptModel.GPT_4_VISION_PREVIEW}
+              GptModel.GPT_4_TURBO_PREVIEW, GptModel.GPT_4_VISION_PREVIEW, GptModel.LLAMA3}
 
 
 class FunctionCall(pydantic.BaseModel):
diff --git a/app/openai_helpers/utils.py b/app/openai_helpers/utils.py
index 415b0ba..34d04e3 100644
--- a/app/openai_helpers/utils.py
+++ b/app/openai_helpers/utils.py
@@ -72,6 +72,8 @@ def instance(cls):
         if cls._key is None:
             raise ValueError("OpenAIAsync is not initialized")
 
+        params['api_key'] = cls._key
+
         if cls._instance is None:
             cls._instance = openai.AsyncOpenAI(**params)
         return cls._instance
diff --git a/settings.py b/settings.py
index 8d00fc9..5887656 100644
--- a/settings.py
+++ b/settings.py
@@ -17,7 +17,10 @@
                   'and assist users to the best of your abilities. Listen carefully to what they say, ask questions, '
                   'and help in any way you can. Avoid giving advices, your ultimate goal is to help the user to find the right solution by himself. '
                   'Ask only one question a time.',
-    }
+    },
+    'ai dungeon': {  # free to be deleted, also you can add new ones
+        'system': 'You are the AI Dungeon game. Your task is to entertain user with role play. User creates a setup and you play role of the world and characters in it.',
+     },
 }
 
 # Mandatory settings

From b713fe07052c6746b592c4555e634bfb77f9f8be Mon Sep 17 00:00:00 2001
From: Maxim Nechaev <maxnecha@gmail.com>
Date: Wed, 24 Apr 2024 02:01:31 +0300
Subject: [PATCH 03/10] Refactoring of adding new models with their own
 base_urls Streaming answers setting comeback

---
 app/bot/settings_menu.py         |   7 +-
 app/bot/telegram_bot.py          |   1 -
 app/context/context_manager.py   |  84 ++---------------
 app/llm_models.py                | 154 +++++++++++++++++++++++++++++++
 app/openai_helpers/chatgpt.py    |  48 +++-------
 app/openai_helpers/llm_client.py |  21 +++++
 app/openai_helpers/utils.py      |  18 ++--
 main.py                          |   3 +-
 settings.py                      |   3 +-
 9 files changed, 210 insertions(+), 129 deletions(-)
 create mode 100644 app/llm_models.py
 create mode 100644 app/openai_helpers/llm_client.py

diff --git a/app/bot/settings_menu.py b/app/bot/settings_menu.py
index 8ebfa05..e2be2ae 100644
--- a/app/bot/settings_menu.py
+++ b/app/bot/settings_menu.py
@@ -2,6 +2,7 @@
 
 from aiogram import Bot, types, Dispatcher
 
+from app.llm_models import get_models
 from app.storage.db import User, DB
 from app.storage.user_role import check_access_conditions, UserRole
 
@@ -16,6 +17,8 @@
     'gpt-4': 'GPT-4'
 }
 
+ALL_MODELS_OPTIONS = list(get_models().keys())
+
 TTS_VOICES = ['alloy', 'echo', 'fable', 'onyx', 'nova', 'shimmer']
 
 SETTINGS_PREFIX = 'settings'
@@ -99,6 +102,7 @@ def __init__(self, bot: Bot, dispatcher: Dispatcher, db: DB):
         self.settings = {
             'current_model': VisibleOptionsSetting('current_model', GPT_MODELS_OPTIONS),
             'current_model_preview': VisibleOptionsSetting('current_model', OLD_MODELS_OPTIONS),
+            'all_models': ChoiceSetting('Model', 'current_model', ALL_MODELS_OPTIONS),
             'gpt_mode': ChoiceSetting('GPT mode', 'gpt_mode', list(settings.gpt_mode.keys())),
             'use_functions': OnOffSetting('Use functions', 'use_functions'),
             'image_generation': OnOffSetting('Image generation', 'image_generation'),
@@ -106,13 +110,14 @@ def __init__(self, bot: Bot, dispatcher: Dispatcher, db: DB):
             'tts-voice': ChoiceSetting('TTS voice', 'tts_voice', TTS_VOICES),
             'voice_as_prompt': OnOffSetting('Voice as prompt', 'voice_as_prompt'),
             'function_call_verbose': OnOffSetting('Verbose function calls', 'function_call_verbose'),
-            # 'streaming_answers': OnOffSetting('Streaming answers', 'streaming_answers'),
+            'streaming_answers': OnOffSetting('Streaming answers', 'streaming_answers'),
             # 'auto_summarize': OnOffSetting('Auto summarize', 'auto_summarize'),
             # 'forward_as_prompt': OnOffSetting('Forward as prompt', 'forward_as_prompt'),
         }
         self.minimum_required_roles = {
             'current_model': settings.USER_ROLE_CHOOSE_MODEL,
             'current_model_preview': UserRole.ADMIN,
+            'all_models': UserRole.ADMIN,
             'image_generation': settings.USER_ROLE_IMAGE_GENERATION,
             'tts-voice': settings.USER_ROLE_TTS,
             'streaming_answers': settings.USER_ROLE_STREAMING_ANSWERS,
diff --git a/app/bot/telegram_bot.py b/app/bot/telegram_bot.py
index a9f4932..edcce9e 100644
--- a/app/bot/telegram_bot.py
+++ b/app/bot/telegram_bot.py
@@ -18,7 +18,6 @@
                                       calculate_image_generation_usage_price, calculate_tts_usage_price)
 from app.storage.db import DBFactory, User
 from app.storage.user_role import check_access_conditions, UserRole
-from app.openai_helpers.chatgpt import GptModel
 
 from aiogram import types, Bot, Dispatcher
 from aiogram.utils import executor
diff --git a/app/context/context_manager.py b/app/context/context_manager.py
index 7c279b2..ed39768 100644
--- a/app/context/context_manager.py
+++ b/app/context/context_manager.py
@@ -1,4 +1,3 @@
-import dataclasses
 from typing import List, Optional
 
 from aiogram import types
@@ -6,87 +5,12 @@
 import settings
 from app.context.dialog_manager import DialogManager
 from app.context.function_manager import FunctionManager
+from app.llm_models import get_models
 from app.openai_helpers.chatgpt import DialogMessage
 from app.openai_helpers.function_storage import FunctionStorage
 from app.storage.db import DB, User, MessageType
 
 
-@dataclasses.dataclass
-class ContextConfiguration:
-    model_name: str
-
-    # long term memory is based on embedding context search
-    long_term_memory_tokens: int
-    # short term memory is used for storing last messages
-    short_term_memory_tokens: int
-    # length of summary to be generated when context is too long
-    summary_length: int
-    # hard limit for context size, when this limit is reached, processing is being stopped,
-    # summarization also cannot be done
-    hard_max_context_size: int
-
-    @staticmethod
-    def get_config(model: str):
-        if model == 'gpt-3.5-turbo':
-            return ContextConfiguration(
-                model_name=model,
-                long_term_memory_tokens=512,
-                short_term_memory_tokens=2560,
-                summary_length=512,
-                hard_max_context_size=5*1024,
-            )
-        elif model == 'gpt-3.5-turbo-16k':
-            return ContextConfiguration(
-                model_name=model,
-                long_term_memory_tokens=1024,
-                short_term_memory_tokens=4096,
-                summary_length=1024,
-                hard_max_context_size=17*1024,
-            )
-        elif model == 'gpt-4':
-            return ContextConfiguration(
-                model_name=model,
-                long_term_memory_tokens=512,
-                short_term_memory_tokens=2048,
-                summary_length=1024,
-                hard_max_context_size=9*1024,
-            )
-        elif model == 'gpt-4-turbo-preview':
-            return ContextConfiguration(
-                model_name=model,
-                long_term_memory_tokens=512,
-                short_term_memory_tokens=5120,
-                summary_length=2048,
-                hard_max_context_size=13*1024,
-            )
-        elif model == 'gpt-4-vision-preview':
-            return ContextConfiguration(
-                model_name=model,
-                long_term_memory_tokens=512,
-                short_term_memory_tokens=5120,
-                summary_length=2048,
-                hard_max_context_size=13*1024,
-            )
-        elif model == 'gpt-4-turbo':
-            return ContextConfiguration(
-                model_name=model,
-                long_term_memory_tokens=512,
-                short_term_memory_tokens=5120,
-                summary_length=2048,
-                hard_max_context_size=13 * 1024,
-            )
-        elif model == 'llama3':
-            return ContextConfiguration(
-                model_name=model,
-                long_term_memory_tokens=512,
-                short_term_memory_tokens=2048,
-                summary_length=512,
-                hard_max_context_size=13 * 1024,
-            )
-        else:
-            raise ValueError(f'Unknown model name: {model}')
-
-
 class ContextManager:
     def __init__(self, db: DB, user: User, message: types.Message):
         self.db = db
@@ -96,7 +20,11 @@ def __init__(self, db: DB, user: User, message: types.Message):
         self.function_manager = None
 
     async def process_dialog(self):
-        context_configuration = ContextConfiguration.get_config(self.user.current_model)
+        models = get_models()
+        llm_model = models.get(self.user.current_model)
+        if not llm_model:
+            raise ValueError(f"Unknown model: {self.user.current_model}")
+        context_configuration = llm_model.context_configuration
         self.dialog_manager = DialogManager(self.db, self.user, context_configuration)
         await self.dialog_manager.process_dialog(self.message)
 
diff --git a/app/llm_models.py b/app/llm_models.py
new file mode 100644
index 0000000..c2be6e4
--- /dev/null
+++ b/app/llm_models.py
@@ -0,0 +1,154 @@
+import dataclasses
+from decimal import Decimal
+
+import settings
+
+
+@dataclasses.dataclass
+class LLMModelPrice:
+    # price per 1000 tokens
+    input_tokens_price: Decimal
+    output_tokens_price: Decimal
+
+
+@dataclasses.dataclass
+class LLMModelContextConfiguration:
+    # long term memory is based on embedding context search
+    long_term_memory_tokens: int
+    # short term memory is used for storing last messages
+    short_term_memory_tokens: int
+    # length of summary to be generated when context is too long
+    summary_length: int
+    # hard limit for context size, when this limit is reached, processing is being stopped,
+    # summarization also cannot be done
+    hard_max_context_size: int
+
+
+class LLMModel:
+    def __init__(self, model_name: str, api_key, context_configuration, model_price=None, base_url=None):
+        if model_price is None:
+            model_price = LLMModelPrice(input_tokens_price=Decimal('0'), output_tokens_price=Decimal('0'))
+
+        self.model_name = model_name
+        self.api_key = api_key
+        self.context_configuration = context_configuration
+        self.model_price = model_price
+        self.base_url = base_url
+
+
+class LLMModels:
+    GPT_35_TURBO = 'gpt-3.5-turbo'
+    GPT_35_TURBO_16K = 'gpt-3.5-turbo-16k'
+    GPT_4 = 'gpt-4'
+    GPT_4_TURBO = 'gpt-4-turbo'
+    GPT_4_TURBO_PREVIEW = 'gpt-4-turbo-preview'
+    GPT_4_VISION_PREVIEW = 'gpt-4-vision-preview'
+    LLAMA3 = 'llama3'
+
+
+def get_models():
+    models = {}
+    openai_models = {
+        LLMModels.GPT_35_TURBO: LLMModel(
+            model_name=LLMModels.GPT_35_TURBO,
+            api_key=settings.OPENAI_TOKEN,
+            context_configuration=LLMModelContextConfiguration(
+                long_term_memory_tokens=512,
+                short_term_memory_tokens=2560,
+                summary_length=512,
+                hard_max_context_size=5*1024,
+            ),
+            model_price=LLMModelPrice(
+                input_tokens_price=Decimal('0.0005'),
+                output_tokens_price=Decimal('0.0015'),
+            ),
+        ),
+        LLMModels.GPT_35_TURBO_16K: LLMModel(
+            model_name=LLMModels.GPT_35_TURBO_16K,
+            api_key=settings.OPENAI_TOKEN,
+            context_configuration=LLMModelContextConfiguration(
+                long_term_memory_tokens=1024,
+                short_term_memory_tokens=4096,
+                summary_length=1024,
+                hard_max_context_size=17*1024,
+            ),
+            model_price=LLMModelPrice(
+                input_tokens_price=Decimal('0.003'),
+                output_tokens_price=Decimal('0.004'),
+            ),
+        ),
+        LLMModels.GPT_4: LLMModel(
+            model_name=LLMModels.GPT_4,
+            api_key=settings.OPENAI_TOKEN,
+            context_configuration=LLMModelContextConfiguration(
+                long_term_memory_tokens=512,
+                short_term_memory_tokens=2048,
+                summary_length=1024,
+                hard_max_context_size=9*1024,
+            ),
+            model_price=LLMModelPrice(
+                input_tokens_price=Decimal('0.03'),
+                output_tokens_price=Decimal('0.06'),
+            ),
+        ),
+        LLMModels.GPT_4_TURBO: LLMModel(
+            model_name=LLMModels.GPT_4_TURBO,
+            api_key=settings.OPENAI_TOKEN,
+            context_configuration=LLMModelContextConfiguration(
+                long_term_memory_tokens=512,
+                short_term_memory_tokens=5120,
+                summary_length=2048,
+                hard_max_context_size=13*1024,
+            ),
+            model_price=LLMModelPrice(
+                input_tokens_price=Decimal('0.01'),
+                output_tokens_price=Decimal('0.03'),
+            ),
+        ),
+        LLMModels.GPT_4_TURBO_PREVIEW: LLMModel(
+            model_name=LLMModels.GPT_4_TURBO_PREVIEW,
+            api_key=settings.OPENAI_TOKEN,
+            context_configuration=LLMModelContextConfiguration(
+                long_term_memory_tokens=512,
+                short_term_memory_tokens=5120,
+                summary_length=2048,
+                hard_max_context_size=13*1024,
+            ),
+            model_price=LLMModelPrice(
+                input_tokens_price=Decimal('0.01'),
+                output_tokens_price=Decimal('0.03'),
+            ),
+        ),
+        LLMModels.GPT_4_VISION_PREVIEW: LLMModel(
+            model_name=LLMModels.GPT_4_VISION_PREVIEW,
+            api_key=settings.OPENAI_TOKEN,
+            context_configuration=LLMModelContextConfiguration(
+                long_term_memory_tokens=512,
+                short_term_memory_tokens=5120,
+                summary_length=2048,
+                hard_max_context_size=13*1024,
+            ),
+            model_price=LLMModelPrice(
+                input_tokens_price=Decimal('0.01'),
+                output_tokens_price=Decimal('0.03'),
+            ),
+        ),
+    }
+
+    if settings.OPENAI_TOKEN:
+        models.update(openai_models)
+
+    if settings.OLLAMA_BASE_URL:
+        models[LLMModels.LLAMA3] = LLMModel(
+            model_name=LLMModels.LLAMA3,
+            api_key=settings.OLLAMA_API_KEY,
+            context_configuration=LLMModelContextConfiguration(
+                long_term_memory_tokens=512,
+                short_term_memory_tokens=2048,
+                summary_length=512,
+                hard_max_context_size=13*1024,
+            ),
+            base_url=settings.OLLAMA_BASE_URL,
+        )
+
+    return models
diff --git a/app/openai_helpers/chatgpt.py b/app/openai_helpers/chatgpt.py
index e580279..b26f4b8 100644
--- a/app/openai_helpers/chatgpt.py
+++ b/app/openai_helpers/chatgpt.py
@@ -2,30 +2,15 @@
 from contextlib import suppress
 from typing import List, Any, Optional, Callable, Union
 
-from openai import BadRequestError
-
 import settings
 from app.bot.utils import merge_dicts
+from app.llm_models import get_models, LLMModels
 from app.openai_helpers.count_tokens import count_messages_tokens, count_tokens_from_functions, count_string_tokens
 from app.openai_helpers.function_storage import FunctionStorage
 
 import pydantic
 
-from app.openai_helpers.utils import OpenAIAsync
-
-
-class GptModel:
-    GPT_35_TURBO = 'gpt-3.5-turbo'
-    GPT_35_TURBO_16K = 'gpt-3.5-turbo-16k'
-    GPT_4 = 'gpt-4'
-    GPT_4_TURBO = 'gpt-4-turbo'
-    GPT_4_TURBO_PREVIEW = 'gpt-4-turbo-preview'
-    GPT_4_VISION_PREVIEW = 'gpt-4-vision-preview'
-    LLAMA3 = 'llama3'
-
-
-GPT_MODELS = {GptModel.GPT_35_TURBO, GptModel.GPT_35_TURBO_16K, GptModel.GPT_4, GptModel.GPT_4_TURBO,
-              GptModel.GPT_4_TURBO_PREVIEW, GptModel.GPT_4_VISION_PREVIEW, GptModel.LLAMA3}
+from app.openai_helpers.llm_client import OpenAILLMClient
 
 
 class FunctionCall(pydantic.BaseModel):
@@ -91,7 +76,7 @@ def openai_message(self):
 class ChatGPT:
     def __init__(self, model, system_prompt: str, function_storage: FunctionStorage = None):
         self.function_storage = function_storage
-        if model not in GPT_MODELS:
+        if model not in get_models():
             raise ValueError(f"Unknown model: {model}")
         self.model = model
         self.system_prompt = system_prompt
@@ -104,7 +89,7 @@ async def send_messages(self, messages_to_send: List[DialogMessage]) -> (DialogM
                 'function_call': 'auto',
             })
 
-        if self.model == GptModel.GPT_4_VISION_PREVIEW:
+        if self.model == LLMModels.GPT_4_VISION_PREVIEW:
             # TODO: somewhy by default it's 16 tokens for this model
             additional_fields['max_tokens'] = 4096
 
@@ -115,7 +100,7 @@ async def send_messages(self, messages_to_send: List[DialogMessage]) -> (DialogM
                 del additional_fields['functions']
 
         messages = self.create_context(messages_to_send, self.system_prompt)
-        resp = await OpenAIAsync.instance().chat.completions.create(
+        resp = await OpenAILLMClient.get_client(self.model).chat.completions.create(
             model=self.model,
             messages=messages,
             temperature=settings.OPENAI_CHAT_COMPLETION_TEMPERATURE,
@@ -130,9 +115,7 @@ async def send_messages_streaming(self, messages_to_send: List[DialogMessage], i
         prompt_tokens = 0
 
         additional_fields = {}
-        system_prompt_addition = None
         if self.function_storage is not None:
-            system_prompt_addition = self.function_storage.get_system_prompt_addition()
             functions = self.function_storage.get_openai_prompt()
             prompt_tokens += count_tokens_from_functions(functions, self.model)
             additional_fields.update({
@@ -140,7 +123,7 @@ async def send_messages_streaming(self, messages_to_send: List[DialogMessage], i
                 'function_call': 'auto',
             })
 
-        if self.model == GptModel.GPT_4_VISION_PREVIEW:
+        if self.model == LLMModels.GPT_4_VISION_PREVIEW:
             # TODO: somewhy by default it's 16 tokens for this model
             additional_fields['max_tokens'] = 4096
 
@@ -151,16 +134,13 @@ async def send_messages_streaming(self, messages_to_send: List[DialogMessage], i
                 del additional_fields['functions']
 
         messages = self.create_context(messages_to_send, self.system_prompt)
-        try:
-            resp_generator = await OpenAIAsync.instance().chat.completions.create(
-                model=self.model,
-                messages=messages,
-                temperature=settings.OPENAI_CHAT_COMPLETION_TEMPERATURE,
-                stream=True,
-                **additional_fields,
-            )
-        except BadRequestError as e:
-            print(e)
+        resp_generator = await OpenAILLMClient.get_client(self.model).chat.completions.create(
+            model=self.model,
+            messages=messages,
+            temperature=settings.OPENAI_CHAT_COMPLETION_TEMPERATURE,
+            stream=True,
+            **additional_fields,
+        )
 
         prompt_tokens += count_messages_tokens(messages, self.model)
         result_dict = {}
@@ -213,7 +193,7 @@ async def summarize_messages(messages: List[DialogMessage], model: str, summary_
         "role": "user",
         "content": f"Summarize this conversation in {summary_max_length} characters or less. Divide different themes explicitly with new lines. Return only text of summary, nothing else.",
     }]
-    resp = await OpenAIAsync.instance().chat.completions.create(
+    resp = await OpenAILLMClient.get_client(model).chat.completions.create(
         model=model,
         messages=prompt_messages,
         temperature=settings.OPENAI_CHAT_COMPLETION_TEMPERATURE,
diff --git a/app/openai_helpers/llm_client.py b/app/openai_helpers/llm_client.py
new file mode 100644
index 0000000..de16ccb
--- /dev/null
+++ b/app/openai_helpers/llm_client.py
@@ -0,0 +1,21 @@
+import openai
+
+from app.llm_models import get_models
+
+
+class OpenAILLMClient:
+    _model_clients = {}
+
+    @classmethod
+    def get_client(cls, model_name: str):
+        if model_name not in cls._model_clients:
+            llm_model = get_models().get(model_name)
+            if not llm_model:
+                raise ValueError(f"Unknown model: {model_name}")
+            params = {
+                'api_key': llm_model.api_key,
+            }
+            if llm_model.base_url:
+                params['base_url'] = llm_model.base_url
+            cls._model_clients[model_name] = openai.AsyncOpenAI(**params)
+        return cls._model_clients[model_name]
diff --git a/app/openai_helpers/utils.py b/app/openai_helpers/utils.py
index 34d04e3..f49eb1c 100644
--- a/app/openai_helpers/utils.py
+++ b/app/openai_helpers/utils.py
@@ -1,17 +1,7 @@
 from decimal import Decimal
 import openai
 
-
-COMPLETION_PRICE = {
-    'gpt-3.5-turbo': (Decimal('0.0005'), Decimal('0.0015')),
-    'gpt-3.5-turbo-16k': (Decimal('0.003'), Decimal('0.004')),
-    'gpt-4': (Decimal('0.03'), Decimal('0.06')),
-    'gpt-4-1106-preview': (Decimal('0.01'), Decimal('0.03')),
-    'gpt-4-vision-preview': (Decimal('0.01'), Decimal('0.03')),
-    'gpt-4-turbo-preview': (Decimal('0.01'), Decimal('0.03')),
-    'gpt-4-turbo': (Decimal('0.01'), Decimal('0.03')),
-    'llama3': (Decimal('0'), Decimal('0')),
-}
+from app.llm_models import get_models
 
 WHISPER_PRICE = Decimal('0.006')
 
@@ -30,7 +20,11 @@
 
 
 def calculate_completion_usage_price(prompt_tokens: int, completion_tokens: int, model: str) -> Decimal:
-    price = COMPLETION_PRICE.get(model)
+    llm_model = get_models().get(model)
+    if not llm_model:
+        raise ValueError(f"Unknown model: {model}")
+
+    price = llm_model.model_price
     if not price:
         raise ValueError(f"Unknown model: {model}")
     prompt_price, completion_price = price
diff --git a/main.py b/main.py
index 7314119..0bddc3f 100644
--- a/main.py
+++ b/main.py
@@ -13,6 +13,7 @@
 
 
 if __name__ == '__main__':
-    OpenAIAsync.init(settings.OLLAMA_API_KEY, settings.OLLAMA_BASE_URL)
+    # needed for whisper and tts capabilities
+    OpenAIAsync.init(settings.OPENAI_TOKEN)
     telegram_bot = TelegramBot(bot, dp)
     telegram_bot.run()
diff --git a/settings.py b/settings.py
index 5887656..be9a88d 100644
--- a/settings.py
+++ b/settings.py
@@ -24,7 +24,7 @@
 }
 
 # Mandatory settings
-OPENAI_TOKEN = 'YOUR_TOKEN'
+OPENAI_TOKEN = ''
 TELEGRAM_BOT_TOKEN = 'YOUR_TOKEN'
 # Image proxy settings
 # This proxy is used to send images to openai for GPT-4-Vision
@@ -73,7 +73,6 @@
 IMAGE_PROXY_BIND_PORT = 8321
 
 OLLAMA_BASE_URL = ''
-OLLAMA_MODEL = 'llama3'
 OLLAMA_API_KEY = 'ollama'
 
 # Vectara RAG settings

From c53f0ab8c4521ae4eb9541a44b0e90db15d19130 Mon Sep 17 00:00:00 2001
From: Maxim Nechaev <maxnecha@gmail.com>
Date: Thu, 25 Apr 2024 12:29:35 +0300
Subject: [PATCH 04/10] Fix count_tokens for custom models

---
 app/openai_helpers/count_tokens.py | 12 +++++++++---
 app/openai_helpers/utils.py        |  2 +-
 2 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/app/openai_helpers/count_tokens.py b/app/openai_helpers/count_tokens.py
index 40cebac..3e495e0 100644
--- a/app/openai_helpers/count_tokens.py
+++ b/app/openai_helpers/count_tokens.py
@@ -21,7 +21,8 @@ def count_string_tokens(string: str, model="gpt-3.5-turbo") -> int:
     elif "gpt-4" in model:
         model = "gpt-4"
     else:
-        raise ValueError(f"Unknown model: {model}")
+        # TODO: add method to calculate tokens for different models
+        model = "gpt-4"
     encoding = tiktoken.encoding_for_model(model)
     return len(encoding.encode(str(string)))
 
@@ -43,7 +44,11 @@ def count_messages_tokens(messages: List[dict], model="gpt-3.5-turbo") -> int:
     tokens_per_message = 3
     tokens_per_name = 1
 
-    encoding = tiktoken.encoding_for_model(model)
+    try:
+        encoding = tiktoken.encoding_for_model(model)
+    except:
+        # TODO: add method to calculate tokens for different models
+        encoding = tiktoken.encoding_for_model("gpt-4")
 
     num_tokens = 0
     for message in messages:
@@ -84,7 +89,8 @@ def count_tokens_from_functions(functions, model="gpt-3.5-turbo"):
     elif "gpt-4" in model:
         model = "gpt-4"
     else:
-        raise ValueError(f"Unknown model: {model}")
+        # TODO: add method to calculate tokens for different models
+        model = "gpt-4"
 
     encoding = tiktoken.encoding_for_model(model)
     num_tokens = 0
diff --git a/app/openai_helpers/utils.py b/app/openai_helpers/utils.py
index f49eb1c..507bdc0 100644
--- a/app/openai_helpers/utils.py
+++ b/app/openai_helpers/utils.py
@@ -27,7 +27,7 @@ def calculate_completion_usage_price(prompt_tokens: int, completion_tokens: int,
     price = llm_model.model_price
     if not price:
         raise ValueError(f"Unknown model: {model}")
-    prompt_price, completion_price = price
+    prompt_price, completion_price = price.input_tokens_price, price.output_tokens_price
     return prompt_price * prompt_tokens / 1000 + completion_price * completion_tokens / 1000
 
 

From 936b138b407882f486330de01c36de3b6240c185 Mon Sep 17 00:00:00 2001
From: Maxim Nechaev <maxnecha@gmail.com>
Date: Sun, 28 Apr 2024 21:54:32 +0300
Subject: [PATCH 05/10] Fix stange bugs

---
 app/bot/batched_input_handler.py | 4 ++--
 app/bot/message_processor.py     | 5 +++--
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/app/bot/batched_input_handler.py b/app/bot/batched_input_handler.py
index 1625cf3..73d4deb 100644
--- a/app/bot/batched_input_handler.py
+++ b/app/bot/batched_input_handler.py
@@ -112,9 +112,9 @@ async def process_batch(self, messages_batch: List[types.Message], user: User):
             for message in messages_batch:
                 if message.audio:
                     await self.handle_voice(message, user, message_processor)
-                if message.voice:
+                elif message.voice:
                     await self.handle_voice(message, user, message_processor)
-                if message.document:
+                elif message.document:
                     await self.handle_document(message, user, message_processor)
                 else:
                     await self.handle_message(message, user, message_processor)
diff --git a/app/bot/message_processor.py b/app/bot/message_processor.py
index 3275e1c..4ed5b4e 100644
--- a/app/bot/message_processor.py
+++ b/app/bot/message_processor.py
@@ -48,7 +48,6 @@ async def add_message_as_context(self, message_id: int = None, message: Message
 
     @staticmethod
     async def prepare_user_message(message: Message):
-
         if message.photo:
             content = []
 
@@ -66,8 +65,10 @@ async def prepare_user_message(message: Message):
             content.append(DialogUtils.construct_message_content_part(DialogUtils.CONTENT_IMAGE_URL, file_url))
 
             return DialogUtils.prepare_user_message(content)
-        else:
+        elif message.text:
             return DialogUtils.prepare_user_message(message.text)
+        else:
+            ValueError("prepare_user_message called with empty message")
 
     async def process(self, is_cancelled):
         context_manager = await self.context_manager()

From 8c3eb4ae1f075f423ea128b1147c1b4eaad4b703 Mon Sep 17 00:00:00 2001
From: Maxim Nechaev <maxnecha@gmail.com>
Date: Mon, 29 Apr 2024 00:30:37 +0300
Subject: [PATCH 06/10] Refactor llmmodel usage Start refactoring
 FunctionStorage for tools api support Add LLMCapabilities class, getting
 ready to integrate

---
 app/context/context_manager.py         |  7 ++---
 app/llm_models.py                      | 23 +++++++++++++++-
 app/openai_helpers/chatgpt.py          | 36 ++++++++++++--------------
 app/openai_helpers/function_storage.py | 13 +++++++++-
 app/openai_helpers/llm_client.py       |  6 ++---
 app/openai_helpers/utils.py            |  7 ++---
 6 files changed, 57 insertions(+), 35 deletions(-)

diff --git a/app/context/context_manager.py b/app/context/context_manager.py
index ed39768..281ba27 100644
--- a/app/context/context_manager.py
+++ b/app/context/context_manager.py
@@ -5,7 +5,7 @@
 import settings
 from app.context.dialog_manager import DialogManager
 from app.context.function_manager import FunctionManager
-from app.llm_models import get_models
+from app.llm_models import get_model_by_name
 from app.openai_helpers.chatgpt import DialogMessage
 from app.openai_helpers.function_storage import FunctionStorage
 from app.storage.db import DB, User, MessageType
@@ -20,10 +20,7 @@ def __init__(self, db: DB, user: User, message: types.Message):
         self.function_manager = None
 
     async def process_dialog(self):
-        models = get_models()
-        llm_model = models.get(self.user.current_model)
-        if not llm_model:
-            raise ValueError(f"Unknown model: {self.user.current_model}")
+        llm_model = get_model_by_name(self.user.current_model)
         context_configuration = llm_model.context_configuration
         self.dialog_manager = DialogManager(self.db, self.user, context_configuration)
         await self.dialog_manager.process_dialog(self.message)
diff --git a/app/llm_models.py b/app/llm_models.py
index c2be6e4..57fb03a 100644
--- a/app/llm_models.py
+++ b/app/llm_models.py
@@ -1,5 +1,6 @@
 import dataclasses
 from decimal import Decimal
+from functools import lru_cache
 
 import settings
 
@@ -24,16 +25,28 @@ class LLMModelContextConfiguration:
     hard_max_context_size: int
 
 
+@dataclasses.dataclass
+class LLMCapabilities:
+    function_calling: bool = False
+    tool_calling: bool = False
+    image_processing: bool = False
+
+
 class LLMModel:
-    def __init__(self, model_name: str, api_key, context_configuration, model_price=None, base_url=None):
+    def __init__(self, *, model_name: str, api_key, context_configuration, model_price=None, base_url=None,
+                 capabilities=None):
         if model_price is None:
             model_price = LLMModelPrice(input_tokens_price=Decimal('0'), output_tokens_price=Decimal('0'))
 
+        if capabilities is None:
+            capabilities = LLMCapabilities()
+
         self.model_name = model_name
         self.api_key = api_key
         self.context_configuration = context_configuration
         self.model_price = model_price
         self.base_url = base_url
+        self.capabilities = capabilities
 
 
 class LLMModels:
@@ -46,6 +59,7 @@ class LLMModels:
     LLAMA3 = 'llama3'
 
 
+@lru_cache
 def get_models():
     models = {}
     openai_models = {
@@ -152,3 +166,10 @@ def get_models():
         )
 
     return models
+
+
+def get_model_by_name(model_name: str):
+    model = get_models().get(model_name)
+    if not model:
+        raise ValueError(f"Unknown model: {model_name}")
+    return model
diff --git a/app/openai_helpers/chatgpt.py b/app/openai_helpers/chatgpt.py
index b26f4b8..dc8e81f 100644
--- a/app/openai_helpers/chatgpt.py
+++ b/app/openai_helpers/chatgpt.py
@@ -4,7 +4,7 @@
 
 import settings
 from app.bot.utils import merge_dicts
-from app.llm_models import get_models, LLMModels
+from app.llm_models import LLMModels, get_model_by_name
 from app.openai_helpers.count_tokens import count_messages_tokens, count_tokens_from_functions, count_string_tokens
 from app.openai_helpers.function_storage import FunctionStorage
 
@@ -76,20 +76,18 @@ def openai_message(self):
 class ChatGPT:
     def __init__(self, model, system_prompt: str, function_storage: FunctionStorage = None):
         self.function_storage = function_storage
-        if model not in get_models():
-            raise ValueError(f"Unknown model: {model}")
-        self.model = model
+        self.llm_model = get_model_by_name(model)
         self.system_prompt = system_prompt
 
     async def send_messages(self, messages_to_send: List[DialogMessage]) -> (DialogMessage, CompletionUsage):
         additional_fields = {}
         if self.function_storage is not None:
             additional_fields.update({
-                'functions': self.function_storage.get_openai_prompt(),
+                'functions': self.function_storage.get_functions_info(),
                 'function_call': 'auto',
             })
 
-        if self.model == LLMModels.GPT_4_VISION_PREVIEW:
+        if self.llm_model.model_name == LLMModels.GPT_4_VISION_PREVIEW:
             # TODO: somewhy by default it's 16 tokens for this model
             additional_fields['max_tokens'] = 4096
 
@@ -100,13 +98,13 @@ async def send_messages(self, messages_to_send: List[DialogMessage]) -> (DialogM
                 del additional_fields['functions']
 
         messages = self.create_context(messages_to_send, self.system_prompt)
-        resp = await OpenAILLMClient.get_client(self.model).chat.completions.create(
-            model=self.model,
+        resp = await OpenAILLMClient.get_client(self.llm_model.model_name).chat.completions.create(
+            model=self.llm_model.model_name,
             messages=messages,
             temperature=settings.OPENAI_CHAT_COMPLETION_TEMPERATURE,
             **additional_fields,
         )
-        completion_usage = CompletionUsage(model=self.model, **dict(resp.usage))
+        completion_usage = CompletionUsage(model=self.llm_model.model_name, **dict(resp.usage))
         message = resp.choices[0].message
         response = DialogMessage(**dict(message))
         return response, completion_usage
@@ -116,14 +114,14 @@ async def send_messages_streaming(self, messages_to_send: List[DialogMessage], i
 
         additional_fields = {}
         if self.function_storage is not None:
-            functions = self.function_storage.get_openai_prompt()
-            prompt_tokens += count_tokens_from_functions(functions, self.model)
+            functions = self.function_storage.get_functions_info()
+            prompt_tokens += count_tokens_from_functions(functions, self.llm_model.model_name)
             additional_fields.update({
-                'functions': self.function_storage.get_openai_prompt(),
+                'functions': self.function_storage.get_functions_info(),
                 'function_call': 'auto',
             })
 
-        if self.model == LLMModels.GPT_4_VISION_PREVIEW:
+        if self.llm_model.model_name == LLMModels.GPT_4_VISION_PREVIEW:
             # TODO: somewhy by default it's 16 tokens for this model
             additional_fields['max_tokens'] = 4096
 
@@ -134,15 +132,15 @@ async def send_messages_streaming(self, messages_to_send: List[DialogMessage], i
                 del additional_fields['functions']
 
         messages = self.create_context(messages_to_send, self.system_prompt)
-        resp_generator = await OpenAILLMClient.get_client(self.model).chat.completions.create(
-            model=self.model,
+        resp_generator = await OpenAILLMClient.get_client(self.llm_model.model_name).chat.completions.create(
+            model=self.llm_model.model_name,
             messages=messages,
             temperature=settings.OPENAI_CHAT_COMPLETION_TEMPERATURE,
             stream=True,
             **additional_fields,
         )
 
-        prompt_tokens += count_messages_tokens(messages, self.model)
+        prompt_tokens += count_messages_tokens(messages, self.llm_model.model_name)
         result_dict = {}
         async for resp_part in resp_generator:
             delta = resp_part.choices[0].delta
@@ -154,19 +152,19 @@ async def send_messages_streaming(self, messages_to_send: List[DialogMessage], i
                     continue
                 result_dict = merge_dicts(result_dict, dict(delta))
                 dialog_message = DialogMessage(**result_dict)
-                completion_tokens = count_messages_tokens([result_dict], model=self.model)
+                completion_tokens = count_messages_tokens([result_dict], model=self.llm_model.model_name)
             elif delta.function_call is not None:
                 result_dict = merge_dicts(result_dict, dict(delta.function_call))
                 dialog_message = DialogMessage(function_call=result_dict)
                 # TODO: find mode accurate way to calculate completion length for function calls
-                completion_tokens = count_string_tokens(json.dumps(result_dict), model=self.model)
+                completion_tokens = count_string_tokens(json.dumps(result_dict), model=self.llm_model.model_name)
             else:
                 continue
 
             # openai doesn't return this field in streaming mode somewhy
             dialog_message.role = 'assistant'
             completion_usage = CompletionUsage(
-                model=self.model,
+                model=self.llm_model.model_name,
                 prompt_tokens=prompt_tokens,
                 completion_tokens=completion_tokens,
                 total_tokens=prompt_tokens + completion_tokens,
diff --git a/app/openai_helpers/function_storage.py b/app/openai_helpers/function_storage.py
index 9d12a3b..4380c12 100644
--- a/app/openai_helpers/function_storage.py
+++ b/app/openai_helpers/function_storage.py
@@ -21,7 +21,7 @@ def extract_function_info(function) -> Dict[str, Any]:
             "parameters": function.get_params_schema(),
         }
 
-    def get_openai_prompt(self):
+    def get_functions_info(self):
         functions = []
         for function in self.functions.values():
             function_info = function['info']
@@ -29,6 +29,17 @@ def get_openai_prompt(self):
 
         return functions
 
+    def get_tools_info(self):
+        tools = []
+        for function in self.functions.values():
+            function_info = {
+                "type": "function",
+                "function": function['info'],
+            }
+            tools.append(function_info)
+
+        return tools
+
     def get_system_prompt_addition(self) -> str:
         result = []
         for function in self.functions.values():
diff --git a/app/openai_helpers/llm_client.py b/app/openai_helpers/llm_client.py
index de16ccb..c528871 100644
--- a/app/openai_helpers/llm_client.py
+++ b/app/openai_helpers/llm_client.py
@@ -1,6 +1,6 @@
 import openai
 
-from app.llm_models import get_models
+from app.llm_models import get_model_by_name
 
 
 class OpenAILLMClient:
@@ -9,9 +9,7 @@ class OpenAILLMClient:
     @classmethod
     def get_client(cls, model_name: str):
         if model_name not in cls._model_clients:
-            llm_model = get_models().get(model_name)
-            if not llm_model:
-                raise ValueError(f"Unknown model: {model_name}")
+            llm_model = get_model_by_name(model_name)
             params = {
                 'api_key': llm_model.api_key,
             }
diff --git a/app/openai_helpers/utils.py b/app/openai_helpers/utils.py
index 507bdc0..f822451 100644
--- a/app/openai_helpers/utils.py
+++ b/app/openai_helpers/utils.py
@@ -1,7 +1,7 @@
 from decimal import Decimal
 import openai
 
-from app.llm_models import get_models
+from app.llm_models import get_model_by_name
 
 WHISPER_PRICE = Decimal('0.006')
 
@@ -20,10 +20,7 @@
 
 
 def calculate_completion_usage_price(prompt_tokens: int, completion_tokens: int, model: str) -> Decimal:
-    llm_model = get_models().get(model)
-    if not llm_model:
-        raise ValueError(f"Unknown model: {model}")
-
+    llm_model = get_model_by_name(model)
     price = llm_model.model_price
     if not price:
         raise ValueError(f"Unknown model: {model}")

From ede71ea9fccbd0d69d77a27d9c780f263915a50d Mon Sep 17 00:00:00 2001
From: Maxim Nechaev <maxnecha@gmail.com>
Date: Mon, 29 Apr 2024 00:53:54 +0300
Subject: [PATCH 07/10] Add handling image_processing capability llm_models
 refactoring

---
 app/bot/batched_input_handler.py |   9 ++
 app/llm_models.py                | 191 +++++++++++++++++--------------
 app/openai_helpers/chatgpt.py    |  23 ++--
 3 files changed, 127 insertions(+), 96 deletions(-)

diff --git a/app/bot/batched_input_handler.py b/app/bot/batched_input_handler.py
index 73d4deb..5fd042d 100644
--- a/app/bot/batched_input_handler.py
+++ b/app/bot/batched_input_handler.py
@@ -11,6 +11,7 @@
 import settings
 from app.bot.message_processor import MessageProcessor
 from app.bot.utils import TypingWorker, message_is_forward, get_username, Timer, generate_document_id
+from app.llm_models import get_model_by_name
 from app.openai_helpers.whisper import get_audio_speech_to_text
 from app.storage.db import User, MessageType
 from app.storage.user_role import check_access_conditions
@@ -116,6 +117,14 @@ async def process_batch(self, messages_batch: List[types.Message], user: User):
                     await self.handle_voice(message, user, message_processor)
                 elif message.document:
                     await self.handle_document(message, user, message_processor)
+                elif message.photo:
+                    # handling image just like message but with some additional checks
+                    llm_model = get_model_by_name(user.current_model)
+                    if llm_model.capabilities.image_processing:
+                        await self.handle_message(message, user, message_processor)
+                    else:
+                        # TODO: exception is a bad way to handle this, need to find a better way
+                        raise ValueError(f'Image processing is not supported by {llm_model.model_name} model.')
                 else:
                     await self.handle_message(message, user, message_processor)
 
diff --git a/app/llm_models.py b/app/llm_models.py
index 57fb03a..7c02c26 100644
--- a/app/llm_models.py
+++ b/app/llm_models.py
@@ -49,7 +49,7 @@ def __init__(self, *, model_name: str, api_key, context_configuration, model_pri
         self.capabilities = capabilities
 
 
-class LLMModels:
+class LLModel:
     GPT_35_TURBO = 'gpt-3.5-turbo'
     GPT_35_TURBO_16K = 'gpt-3.5-turbo-16k'
     GPT_4 = 'gpt-4'
@@ -62,99 +62,118 @@ class LLMModels:
 @lru_cache
 def get_models():
     models = {}
-    openai_models = {
-        LLMModels.GPT_35_TURBO: LLMModel(
-            model_name=LLMModels.GPT_35_TURBO,
-            api_key=settings.OPENAI_TOKEN,
-            context_configuration=LLMModelContextConfiguration(
-                long_term_memory_tokens=512,
-                short_term_memory_tokens=2560,
-                summary_length=512,
-                hard_max_context_size=5*1024,
-            ),
-            model_price=LLMModelPrice(
-                input_tokens_price=Decimal('0.0005'),
-                output_tokens_price=Decimal('0.0015'),
-            ),
-        ),
-        LLMModels.GPT_35_TURBO_16K: LLMModel(
-            model_name=LLMModels.GPT_35_TURBO_16K,
-            api_key=settings.OPENAI_TOKEN,
-            context_configuration=LLMModelContextConfiguration(
-                long_term_memory_tokens=1024,
-                short_term_memory_tokens=4096,
-                summary_length=1024,
-                hard_max_context_size=17*1024,
-            ),
-            model_price=LLMModelPrice(
-                input_tokens_price=Decimal('0.003'),
-                output_tokens_price=Decimal('0.004'),
-            ),
-        ),
-        LLMModels.GPT_4: LLMModel(
-            model_name=LLMModels.GPT_4,
-            api_key=settings.OPENAI_TOKEN,
-            context_configuration=LLMModelContextConfiguration(
-                long_term_memory_tokens=512,
-                short_term_memory_tokens=2048,
-                summary_length=1024,
-                hard_max_context_size=9*1024,
-            ),
-            model_price=LLMModelPrice(
-                input_tokens_price=Decimal('0.03'),
-                output_tokens_price=Decimal('0.06'),
-            ),
-        ),
-        LLMModels.GPT_4_TURBO: LLMModel(
-            model_name=LLMModels.GPT_4_TURBO,
-            api_key=settings.OPENAI_TOKEN,
-            context_configuration=LLMModelContextConfiguration(
-                long_term_memory_tokens=512,
-                short_term_memory_tokens=5120,
-                summary_length=2048,
-                hard_max_context_size=13*1024,
+
+    if settings.OPENAI_TOKEN:
+        models.update({
+            LLModel.GPT_35_TURBO: LLMModel(
+                model_name=LLModel.GPT_35_TURBO,
+                api_key=settings.OPENAI_TOKEN,
+                context_configuration=LLMModelContextConfiguration(
+                    long_term_memory_tokens=512,
+                    short_term_memory_tokens=2560,
+                    summary_length=512,
+                    hard_max_context_size=5*1024,
+                ),
+                model_price=LLMModelPrice(
+                    input_tokens_price=Decimal('0.0005'),
+                    output_tokens_price=Decimal('0.0015'),
+                ),
+                capabilities=LLMCapabilities(
+                    function_calling=True,
+                ),
             ),
-            model_price=LLMModelPrice(
-                input_tokens_price=Decimal('0.01'),
-                output_tokens_price=Decimal('0.03'),
+            LLModel.GPT_35_TURBO_16K: LLMModel(
+                model_name=LLModel.GPT_35_TURBO_16K,
+                api_key=settings.OPENAI_TOKEN,
+                context_configuration=LLMModelContextConfiguration(
+                    long_term_memory_tokens=1024,
+                    short_term_memory_tokens=4096,
+                    summary_length=1024,
+                    hard_max_context_size=17*1024,
+                ),
+                model_price=LLMModelPrice(
+                    input_tokens_price=Decimal('0.003'),
+                    output_tokens_price=Decimal('0.004'),
+                ),
+                capabilities=LLMCapabilities(
+                    function_calling=True,
+                ),
             ),
-        ),
-        LLMModels.GPT_4_TURBO_PREVIEW: LLMModel(
-            model_name=LLMModels.GPT_4_TURBO_PREVIEW,
-            api_key=settings.OPENAI_TOKEN,
-            context_configuration=LLMModelContextConfiguration(
-                long_term_memory_tokens=512,
-                short_term_memory_tokens=5120,
-                summary_length=2048,
-                hard_max_context_size=13*1024,
+            LLModel.GPT_4: LLMModel(
+                model_name=LLModel.GPT_4,
+                api_key=settings.OPENAI_TOKEN,
+                context_configuration=LLMModelContextConfiguration(
+                    long_term_memory_tokens=512,
+                    short_term_memory_tokens=2048,
+                    summary_length=1024,
+                    hard_max_context_size=9*1024,
+                ),
+                model_price=LLMModelPrice(
+                    input_tokens_price=Decimal('0.03'),
+                    output_tokens_price=Decimal('0.06'),
+                ),
+                capabilities=LLMCapabilities(
+                    function_calling=True,
+                ),
             ),
-            model_price=LLMModelPrice(
-                input_tokens_price=Decimal('0.01'),
-                output_tokens_price=Decimal('0.03'),
+            LLModel.GPT_4_TURBO: LLMModel(
+                model_name=LLModel.GPT_4_TURBO,
+                api_key=settings.OPENAI_TOKEN,
+                context_configuration=LLMModelContextConfiguration(
+                    long_term_memory_tokens=512,
+                    short_term_memory_tokens=5120,
+                    summary_length=2048,
+                    hard_max_context_size=13*1024,
+                ),
+                model_price=LLMModelPrice(
+                    input_tokens_price=Decimal('0.01'),
+                    output_tokens_price=Decimal('0.03'),
+                ),
+                capabilities=LLMCapabilities(
+                    function_calling=True,
+                    image_processing=True,
+                ),
             ),
-        ),
-        LLMModels.GPT_4_VISION_PREVIEW: LLMModel(
-            model_name=LLMModels.GPT_4_VISION_PREVIEW,
-            api_key=settings.OPENAI_TOKEN,
-            context_configuration=LLMModelContextConfiguration(
-                long_term_memory_tokens=512,
-                short_term_memory_tokens=5120,
-                summary_length=2048,
-                hard_max_context_size=13*1024,
+            LLModel.GPT_4_TURBO_PREVIEW: LLMModel(
+                model_name=LLModel.GPT_4_TURBO_PREVIEW,
+                api_key=settings.OPENAI_TOKEN,
+                context_configuration=LLMModelContextConfiguration(
+                    long_term_memory_tokens=512,
+                    short_term_memory_tokens=5120,
+                    summary_length=2048,
+                    hard_max_context_size=13*1024,
+                ),
+                model_price=LLMModelPrice(
+                    input_tokens_price=Decimal('0.01'),
+                    output_tokens_price=Decimal('0.03'),
+                ),
+                capabilities=LLMCapabilities(
+                    function_calling=True,
+                ),
             ),
-            model_price=LLMModelPrice(
-                input_tokens_price=Decimal('0.01'),
-                output_tokens_price=Decimal('0.03'),
+            LLModel.GPT_4_VISION_PREVIEW: LLMModel(
+                model_name=LLModel.GPT_4_VISION_PREVIEW,
+                api_key=settings.OPENAI_TOKEN,
+                context_configuration=LLMModelContextConfiguration(
+                    long_term_memory_tokens=512,
+                    short_term_memory_tokens=5120,
+                    summary_length=2048,
+                    hard_max_context_size=13*1024,
+                ),
+                model_price=LLMModelPrice(
+                    input_tokens_price=Decimal('0.01'),
+                    output_tokens_price=Decimal('0.03'),
+                ),
+                capabilities=LLMCapabilities(
+                    image_processing=True,
+                ),
             ),
-        ),
-    }
-
-    if settings.OPENAI_TOKEN:
-        models.update(openai_models)
+        })
 
+    # example of using llama3 model in ollama
     if settings.OLLAMA_BASE_URL:
-        models[LLMModels.LLAMA3] = LLMModel(
-            model_name=LLMModels.LLAMA3,
+        models[LLModel.LLAMA3] = LLMModel(
+            model_name=LLModel.LLAMA3,
             api_key=settings.OLLAMA_API_KEY,
             context_configuration=LLMModelContextConfiguration(
                 long_term_memory_tokens=512,
diff --git a/app/openai_helpers/chatgpt.py b/app/openai_helpers/chatgpt.py
index dc8e81f..e412434 100644
--- a/app/openai_helpers/chatgpt.py
+++ b/app/openai_helpers/chatgpt.py
@@ -4,7 +4,7 @@
 
 import settings
 from app.bot.utils import merge_dicts
-from app.llm_models import LLMModels, get_model_by_name
+from app.llm_models import LLModel, get_model_by_name
 from app.openai_helpers.count_tokens import count_messages_tokens, count_tokens_from_functions, count_string_tokens
 from app.openai_helpers.function_storage import FunctionStorage
 
@@ -87,7 +87,7 @@ async def send_messages(self, messages_to_send: List[DialogMessage]) -> (DialogM
                 'function_call': 'auto',
             })
 
-        if self.llm_model.model_name == LLMModels.GPT_4_VISION_PREVIEW:
+        if self.llm_model.model_name == LLModel.GPT_4_VISION_PREVIEW:
             # TODO: somewhy by default it's 16 tokens for this model
             additional_fields['max_tokens'] = 4096
 
@@ -114,14 +114,17 @@ async def send_messages_streaming(self, messages_to_send: List[DialogMessage], i
 
         additional_fields = {}
         if self.function_storage is not None:
-            functions = self.function_storage.get_functions_info()
-            prompt_tokens += count_tokens_from_functions(functions, self.llm_model.model_name)
-            additional_fields.update({
-                'functions': self.function_storage.get_functions_info(),
-                'function_call': 'auto',
-            })
-
-        if self.llm_model.model_name == LLMModels.GPT_4_VISION_PREVIEW:
+            if self.llm_model.capabilities.function_calling:
+                functions = self.function_storage.get_functions_info()
+                prompt_tokens += count_tokens_from_functions(functions, self.llm_model.model_name)
+                additional_fields.update({
+                    'functions': self.function_storage.get_functions_info(),
+                    'function_call': 'auto',
+                })
+            elif self.llm_model.capabilities.tool_calling:
+                NotImplementedError('Tool calling support is not implemented yet')
+
+        if self.llm_model.model_name == LLModel.GPT_4_VISION_PREVIEW:
             # TODO: somewhy by default it's 16 tokens for this model
             additional_fields['max_tokens'] = 4096
 

From e08b9e6e23558e02b7c4978169d47191e9d35dd7 Mon Sep 17 00:00:00 2001
From: Maxim Nechaev <maxnecha@gmail.com>
Date: Mon, 29 Apr 2024 00:59:36 +0300
Subject: [PATCH 08/10] Update readme

---
 README.md | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index fc09a18..8e407df 100644
--- a/README.md
+++ b/README.md
@@ -3,11 +3,12 @@
 This GitHub repository contains the implementation of a telegram bot, designed to facilitate seamless interaction with GPT-3.5 and GPT-4, state-of-the-art language models by OpenAI.  
 
 🔥 **GPT-4 Turbo support (with vision)**  
+🔥 **Custom OpenAI API compatible endpoints support**  
 🔥 **DALL-E 3 Image generation support**
 
 🔑 **Key Features**
 
-1. **Model Support**: gpt-3.5-turbo, gpt-4-turbo, gpt-4, gpt-4-turbo-preview, gpt-4-vision-preview.
+1. **Model Support**: all OpenAI models are supported out of the box. Also you can add OpenAI API compatible endpoints by adding them to `app/llm_models.py`
 2. **Image Generation**: You can ask bot to generate images using DALL-E 3 model, use bot just like official chatgpt app.
 3. **Dynamic Dialog Management**: The bot automatically manages the context of the conversation, eliminating the need for the user to manually reset the context using the /reset command. You still can reset dialog manually if needed.
 4. **Automatic Context Summarization**: In case the context size exceeds the model's maximum limit, the bot automatically summarizes the context to ensure the continuity of the conversation.

From a0a089a4edef85933576305d97d1eacf2581a844 Mon Sep 17 00:00:00 2001
From: Maxim Nechaev <maxnecha@gmail.com>
Date: Mon, 29 Apr 2024 01:02:55 +0300
Subject: [PATCH 09/10] Refactor LLM configuration naming

---
 app/llm_models.py | 66 +++++++++++++++++++++++------------------------
 1 file changed, 32 insertions(+), 34 deletions(-)

diff --git a/app/llm_models.py b/app/llm_models.py
index 7c02c26..be1978c 100644
--- a/app/llm_models.py
+++ b/app/llm_models.py
@@ -6,14 +6,14 @@
 
 
 @dataclasses.dataclass
-class LLMModelPrice:
+class LLMPrice:
     # price per 1000 tokens
     input_tokens_price: Decimal
     output_tokens_price: Decimal
 
 
 @dataclasses.dataclass
-class LLMModelContextConfiguration:
+class LLMContextConfiguration:
     # long term memory is based on embedding context search
     long_term_memory_tokens: int
     # short term memory is used for storing last messages
@@ -32,11 +32,19 @@ class LLMCapabilities:
     image_processing: bool = False
 
 
-class LLMModel:
+class LLModel:
+    GPT_35_TURBO = 'gpt-3.5-turbo'
+    GPT_35_TURBO_16K = 'gpt-3.5-turbo-16k'
+    GPT_4 = 'gpt-4'
+    GPT_4_TURBO = 'gpt-4-turbo'
+    GPT_4_TURBO_PREVIEW = 'gpt-4-turbo-preview'
+    GPT_4_VISION_PREVIEW = 'gpt-4-vision-preview'
+    LLAMA3 = 'llama3'
+
     def __init__(self, *, model_name: str, api_key, context_configuration, model_price=None, base_url=None,
                  capabilities=None):
         if model_price is None:
-            model_price = LLMModelPrice(input_tokens_price=Decimal('0'), output_tokens_price=Decimal('0'))
+            model_price = LLMPrice(input_tokens_price=Decimal('0'), output_tokens_price=Decimal('0'))
 
         if capabilities is None:
             capabilities = LLMCapabilities()
@@ -49,32 +57,22 @@ def __init__(self, *, model_name: str, api_key, context_configuration, model_pri
         self.capabilities = capabilities
 
 
-class LLModel:
-    GPT_35_TURBO = 'gpt-3.5-turbo'
-    GPT_35_TURBO_16K = 'gpt-3.5-turbo-16k'
-    GPT_4 = 'gpt-4'
-    GPT_4_TURBO = 'gpt-4-turbo'
-    GPT_4_TURBO_PREVIEW = 'gpt-4-turbo-preview'
-    GPT_4_VISION_PREVIEW = 'gpt-4-vision-preview'
-    LLAMA3 = 'llama3'
-
-
 @lru_cache
 def get_models():
     models = {}
 
     if settings.OPENAI_TOKEN:
         models.update({
-            LLModel.GPT_35_TURBO: LLMModel(
+            LLModel.GPT_35_TURBO: LLModel(
                 model_name=LLModel.GPT_35_TURBO,
                 api_key=settings.OPENAI_TOKEN,
-                context_configuration=LLMModelContextConfiguration(
+                context_configuration=LLMContextConfiguration(
                     long_term_memory_tokens=512,
                     short_term_memory_tokens=2560,
                     summary_length=512,
                     hard_max_context_size=5*1024,
                 ),
-                model_price=LLMModelPrice(
+                model_price=LLMPrice(
                     input_tokens_price=Decimal('0.0005'),
                     output_tokens_price=Decimal('0.0015'),
                 ),
@@ -82,16 +80,16 @@ def get_models():
                     function_calling=True,
                 ),
             ),
-            LLModel.GPT_35_TURBO_16K: LLMModel(
+            LLModel.GPT_35_TURBO_16K: LLModel(
                 model_name=LLModel.GPT_35_TURBO_16K,
                 api_key=settings.OPENAI_TOKEN,
-                context_configuration=LLMModelContextConfiguration(
+                context_configuration=LLMContextConfiguration(
                     long_term_memory_tokens=1024,
                     short_term_memory_tokens=4096,
                     summary_length=1024,
                     hard_max_context_size=17*1024,
                 ),
-                model_price=LLMModelPrice(
+                model_price=LLMPrice(
                     input_tokens_price=Decimal('0.003'),
                     output_tokens_price=Decimal('0.004'),
                 ),
@@ -99,16 +97,16 @@ def get_models():
                     function_calling=True,
                 ),
             ),
-            LLModel.GPT_4: LLMModel(
+            LLModel.GPT_4: LLModel(
                 model_name=LLModel.GPT_4,
                 api_key=settings.OPENAI_TOKEN,
-                context_configuration=LLMModelContextConfiguration(
+                context_configuration=LLMContextConfiguration(
                     long_term_memory_tokens=512,
                     short_term_memory_tokens=2048,
                     summary_length=1024,
                     hard_max_context_size=9*1024,
                 ),
-                model_price=LLMModelPrice(
+                model_price=LLMPrice(
                     input_tokens_price=Decimal('0.03'),
                     output_tokens_price=Decimal('0.06'),
                 ),
@@ -116,16 +114,16 @@ def get_models():
                     function_calling=True,
                 ),
             ),
-            LLModel.GPT_4_TURBO: LLMModel(
+            LLModel.GPT_4_TURBO: LLModel(
                 model_name=LLModel.GPT_4_TURBO,
                 api_key=settings.OPENAI_TOKEN,
-                context_configuration=LLMModelContextConfiguration(
+                context_configuration=LLMContextConfiguration(
                     long_term_memory_tokens=512,
                     short_term_memory_tokens=5120,
                     summary_length=2048,
                     hard_max_context_size=13*1024,
                 ),
-                model_price=LLMModelPrice(
+                model_price=LLMPrice(
                     input_tokens_price=Decimal('0.01'),
                     output_tokens_price=Decimal('0.03'),
                 ),
@@ -134,16 +132,16 @@ def get_models():
                     image_processing=True,
                 ),
             ),
-            LLModel.GPT_4_TURBO_PREVIEW: LLMModel(
+            LLModel.GPT_4_TURBO_PREVIEW: LLModel(
                 model_name=LLModel.GPT_4_TURBO_PREVIEW,
                 api_key=settings.OPENAI_TOKEN,
-                context_configuration=LLMModelContextConfiguration(
+                context_configuration=LLMContextConfiguration(
                     long_term_memory_tokens=512,
                     short_term_memory_tokens=5120,
                     summary_length=2048,
                     hard_max_context_size=13*1024,
                 ),
-                model_price=LLMModelPrice(
+                model_price=LLMPrice(
                     input_tokens_price=Decimal('0.01'),
                     output_tokens_price=Decimal('0.03'),
                 ),
@@ -151,16 +149,16 @@ def get_models():
                     function_calling=True,
                 ),
             ),
-            LLModel.GPT_4_VISION_PREVIEW: LLMModel(
+            LLModel.GPT_4_VISION_PREVIEW: LLModel(
                 model_name=LLModel.GPT_4_VISION_PREVIEW,
                 api_key=settings.OPENAI_TOKEN,
-                context_configuration=LLMModelContextConfiguration(
+                context_configuration=LLMContextConfiguration(
                     long_term_memory_tokens=512,
                     short_term_memory_tokens=5120,
                     summary_length=2048,
                     hard_max_context_size=13*1024,
                 ),
-                model_price=LLMModelPrice(
+                model_price=LLMPrice(
                     input_tokens_price=Decimal('0.01'),
                     output_tokens_price=Decimal('0.03'),
                 ),
@@ -172,10 +170,10 @@ def get_models():
 
     # example of using llama3 model in ollama
     if settings.OLLAMA_BASE_URL:
-        models[LLModel.LLAMA3] = LLMModel(
+        models[LLModel.LLAMA3] = LLModel(
             model_name=LLModel.LLAMA3,
             api_key=settings.OLLAMA_API_KEY,
-            context_configuration=LLMModelContextConfiguration(
+            context_configuration=LLMContextConfiguration(
                 long_term_memory_tokens=512,
                 short_term_memory_tokens=2048,
                 summary_length=512,

From 63c9643c56a22c65b586ccd165fd1601de8ecc10 Mon Sep 17 00:00:00 2001
From: Maxim Nechaev <maxnecha@gmail.com>
Date: Mon, 29 Apr 2024 01:39:51 +0300
Subject: [PATCH 10/10] Remove function_storage from processing pipeline
 earlier if model doesn't support tool calls or function calls

---
 app/bot/message_processor.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/app/bot/message_processor.py b/app/bot/message_processor.py
index 4ed5b4e..7f02ccd 100644
--- a/app/bot/message_processor.py
+++ b/app/bot/message_processor.py
@@ -10,6 +10,7 @@
 from app.bot.utils import send_telegram_message, detect_and_extract_code, edit_telegram_message
 from app.context.context_manager import build_context_manager
 from app.context.dialog_manager import DialogUtils
+from app.llm_models import get_model_by_name
 from app.openai_helpers.chatgpt import ChatGPT
 from app.openai_helpers.count_tokens import calculate_image_tokens
 from app.storage.db import DB, User, MessageType
@@ -73,7 +74,10 @@ async def prepare_user_message(message: Message):
     async def process(self, is_cancelled):
         context_manager = await self.context_manager()
 
-        function_storage = await context_manager.get_function_storage()
+        llm_model = get_model_by_name(self.user.current_model)
+        function_storage = None
+        if llm_model.capabilities.tool_calling or llm_model.capabilities.function_calling:
+            function_storage = await context_manager.get_function_storage()
         system_prompt = await context_manager.get_system_prompt()
         chat_gpt_manager = ChatGptManager(ChatGPT(self.user.current_model, system_prompt, function_storage), self.db)