diff --git a/README.md b/README.md
index fc09a18..8e407df 100644
--- a/README.md
+++ b/README.md
@@ -3,11 +3,12 @@
 This GitHub repository contains the implementation of a telegram bot, designed to facilitate seamless interaction with GPT-3.5 and GPT-4, state-of-the-art language models by OpenAI.  
 
 🔥 **GPT-4 Turbo support (with vision)**  
+🔥 **Custom OpenAI API compatible endpoints support**  
 🔥 **DALL-E 3 Image generation support**
 
 🔑 **Key Features**
 
-1. **Model Support**: gpt-3.5-turbo, gpt-4-turbo, gpt-4, gpt-4-turbo-preview, gpt-4-vision-preview.
+1. **Model Support**: all OpenAI models are supported out of the box. Also you can add OpenAI API compatible endpoints by adding them to `app/llm_models.py`
 2. **Image Generation**: You can ask bot to generate images using DALL-E 3 model, use bot just like official chatgpt app.
 3. **Dynamic Dialog Management**: The bot automatically manages the context of the conversation, eliminating the need for the user to manually reset the context using the /reset command. You still can reset dialog manually if needed.
 4. **Automatic Context Summarization**: In case the context size exceeds the model's maximum limit, the bot automatically summarizes the context to ensure the continuity of the conversation.
diff --git a/app/bot/batched_input_handler.py b/app/bot/batched_input_handler.py
index 1625cf3..5fd042d 100644
--- a/app/bot/batched_input_handler.py
+++ b/app/bot/batched_input_handler.py
@@ -11,6 +11,7 @@
 import settings
 from app.bot.message_processor import MessageProcessor
 from app.bot.utils import TypingWorker, message_is_forward, get_username, Timer, generate_document_id
+from app.llm_models import get_model_by_name
 from app.openai_helpers.whisper import get_audio_speech_to_text
 from app.storage.db import User, MessageType
 from app.storage.user_role import check_access_conditions
@@ -112,10 +113,18 @@ async def process_batch(self, messages_batch: List[types.Message], user: User):
             for message in messages_batch:
                 if message.audio:
                     await self.handle_voice(message, user, message_processor)
-                if message.voice:
+                elif message.voice:
                     await self.handle_voice(message, user, message_processor)
-                if message.document:
+                elif message.document:
                     await self.handle_document(message, user, message_processor)
+                elif message.photo:
+                    # handling image just like message but with some additional checks
+                    llm_model = get_model_by_name(user.current_model)
+                    if llm_model.capabilities.image_processing:
+                        await self.handle_message(message, user, message_processor)
+                    else:
+                        # TODO: exception is a bad way to handle this, need to find a better way
+                        raise ValueError(f'Image processing is not supported by {llm_model.model_name} model.')
                 else:
                     await self.handle_message(message, user, message_processor)
 
diff --git a/app/bot/message_processor.py b/app/bot/message_processor.py
index 702d7d1..7f02ccd 100644
--- a/app/bot/message_processor.py
+++ b/app/bot/message_processor.py
@@ -10,6 +10,7 @@
 from app.bot.utils import send_telegram_message, detect_and_extract_code, edit_telegram_message
 from app.context.context_manager import build_context_manager
 from app.context.dialog_manager import DialogUtils
+from app.llm_models import get_model_by_name
 from app.openai_helpers.chatgpt import ChatGPT
 from app.openai_helpers.count_tokens import calculate_image_tokens
 from app.storage.db import DB, User, MessageType
@@ -48,11 +49,12 @@ async def add_message_as_context(self, message_id: int = None, message: Message
 
     @staticmethod
     async def prepare_user_message(message: Message):
-        content = []
-        if message.text:
-            content.append(DialogUtils.construct_message_content_part(DialogUtils.CONTENT_TEXT, message.text))
-
         if message.photo:
+            content = []
+
+            if message.text:
+                content.append(DialogUtils.construct_message_content_part(DialogUtils.CONTENT_TEXT, message.text))
+
             # largest photo
             photo = message.photo[-1]
             file_id = photo.file_id
@@ -63,12 +65,19 @@ async def prepare_user_message(message: Message):
             file_url = urljoin(f'{settings.IMAGE_PROXY_URL}:{settings.IMAGE_PROXY_PORT}', f'{file_id}_{tokens}.jpg')
             content.append(DialogUtils.construct_message_content_part(DialogUtils.CONTENT_IMAGE_URL, file_url))
 
-        return DialogUtils.prepare_user_message(content)
+            return DialogUtils.prepare_user_message(content)
+        elif message.text:
+            return DialogUtils.prepare_user_message(message.text)
+        else:
+            ValueError("prepare_user_message called with empty message")
 
     async def process(self, is_cancelled):
         context_manager = await self.context_manager()
 
-        function_storage = await context_manager.get_function_storage()
+        llm_model = get_model_by_name(self.user.current_model)
+        function_storage = None
+        if llm_model.capabilities.tool_calling or llm_model.capabilities.function_calling:
+            function_storage = await context_manager.get_function_storage()
         system_prompt = await context_manager.get_system_prompt()
         chat_gpt_manager = ChatGptManager(ChatGPT(self.user.current_model, system_prompt, function_storage), self.db)
 
diff --git a/app/bot/settings_menu.py b/app/bot/settings_menu.py
index 8ebfa05..e2be2ae 100644
--- a/app/bot/settings_menu.py
+++ b/app/bot/settings_menu.py
@@ -2,6 +2,7 @@
 
 from aiogram import Bot, types, Dispatcher
 
+from app.llm_models import get_models
 from app.storage.db import User, DB
 from app.storage.user_role import check_access_conditions, UserRole
 
@@ -16,6 +17,8 @@
     'gpt-4': 'GPT-4'
 }
 
+ALL_MODELS_OPTIONS = list(get_models().keys())
+
 TTS_VOICES = ['alloy', 'echo', 'fable', 'onyx', 'nova', 'shimmer']
 
 SETTINGS_PREFIX = 'settings'
@@ -99,6 +102,7 @@ def __init__(self, bot: Bot, dispatcher: Dispatcher, db: DB):
         self.settings = {
             'current_model': VisibleOptionsSetting('current_model', GPT_MODELS_OPTIONS),
             'current_model_preview': VisibleOptionsSetting('current_model', OLD_MODELS_OPTIONS),
+            'all_models': ChoiceSetting('Model', 'current_model', ALL_MODELS_OPTIONS),
             'gpt_mode': ChoiceSetting('GPT mode', 'gpt_mode', list(settings.gpt_mode.keys())),
             'use_functions': OnOffSetting('Use functions', 'use_functions'),
             'image_generation': OnOffSetting('Image generation', 'image_generation'),
@@ -106,13 +110,14 @@ def __init__(self, bot: Bot, dispatcher: Dispatcher, db: DB):
             'tts-voice': ChoiceSetting('TTS voice', 'tts_voice', TTS_VOICES),
             'voice_as_prompt': OnOffSetting('Voice as prompt', 'voice_as_prompt'),
             'function_call_verbose': OnOffSetting('Verbose function calls', 'function_call_verbose'),
-            # 'streaming_answers': OnOffSetting('Streaming answers', 'streaming_answers'),
+            'streaming_answers': OnOffSetting('Streaming answers', 'streaming_answers'),
             # 'auto_summarize': OnOffSetting('Auto summarize', 'auto_summarize'),
             # 'forward_as_prompt': OnOffSetting('Forward as prompt', 'forward_as_prompt'),
         }
         self.minimum_required_roles = {
             'current_model': settings.USER_ROLE_CHOOSE_MODEL,
             'current_model_preview': UserRole.ADMIN,
+            'all_models': UserRole.ADMIN,
             'image_generation': settings.USER_ROLE_IMAGE_GENERATION,
             'tts-voice': settings.USER_ROLE_TTS,
             'streaming_answers': settings.USER_ROLE_STREAMING_ANSWERS,
diff --git a/app/bot/telegram_bot.py b/app/bot/telegram_bot.py
index a9f4932..edcce9e 100644
--- a/app/bot/telegram_bot.py
+++ b/app/bot/telegram_bot.py
@@ -18,7 +18,6 @@
                                       calculate_image_generation_usage_price, calculate_tts_usage_price)
 from app.storage.db import DBFactory, User
 from app.storage.user_role import check_access_conditions, UserRole
-from app.openai_helpers.chatgpt import GptModel
 
 from aiogram import types, Bot, Dispatcher
 from aiogram.utils import executor
diff --git a/app/context/context_manager.py b/app/context/context_manager.py
index 493d82c..281ba27 100644
--- a/app/context/context_manager.py
+++ b/app/context/context_manager.py
@@ -1,4 +1,3 @@
-import dataclasses
 from typing import List, Optional
 
 from aiogram import types
@@ -6,79 +5,12 @@
 import settings
 from app.context.dialog_manager import DialogManager
 from app.context.function_manager import FunctionManager
+from app.llm_models import get_model_by_name
 from app.openai_helpers.chatgpt import DialogMessage
 from app.openai_helpers.function_storage import FunctionStorage
 from app.storage.db import DB, User, MessageType
 
 
-@dataclasses.dataclass
-class ContextConfiguration:
-    model_name: str
-
-    # long term memory is based on embedding context search
-    long_term_memory_tokens: int
-    # short term memory is used for storing last messages
-    short_term_memory_tokens: int
-    # length of summary to be generated when context is too long
-    summary_length: int
-    # hard limit for context size, when this limit is reached, processing is being stopped,
-    # summarization also cannot be done
-    hard_max_context_size: int
-
-    @staticmethod
-    def get_config(model: str):
-        if model == 'gpt-3.5-turbo':
-            return ContextConfiguration(
-                model_name=model,
-                long_term_memory_tokens=512,
-                short_term_memory_tokens=2560,
-                summary_length=512,
-                hard_max_context_size=5*1024,
-            )
-        elif model == 'gpt-3.5-turbo-16k':
-            return ContextConfiguration(
-                model_name=model,
-                long_term_memory_tokens=1024,
-                short_term_memory_tokens=4096,
-                summary_length=1024,
-                hard_max_context_size=17*1024,
-            )
-        elif model == 'gpt-4':
-            return ContextConfiguration(
-                model_name=model,
-                long_term_memory_tokens=512,
-                short_term_memory_tokens=2048,
-                summary_length=1024,
-                hard_max_context_size=9*1024,
-            )
-        elif model == 'gpt-4-turbo-preview':
-            return ContextConfiguration(
-                model_name=model,
-                long_term_memory_tokens=512,
-                short_term_memory_tokens=5120,
-                summary_length=2048,
-                hard_max_context_size=13*1024,
-            )
-        elif model == 'gpt-4-vision-preview':
-            return ContextConfiguration(
-                model_name=model,
-                long_term_memory_tokens=512,
-                short_term_memory_tokens=5120,
-                summary_length=2048,
-                hard_max_context_size=13*1024,
-            )
-        elif model == 'gpt-4-turbo':
-            return ContextConfiguration(
-                model_name=model,
-                long_term_memory_tokens=512,
-                short_term_memory_tokens=5120,
-                summary_length=2048,
-                hard_max_context_size=13 * 1024,
-            )
-        else:
-            raise ValueError(f'Unknown model name: {model}')
-
-
 class ContextManager:
     def __init__(self, db: DB, user: User, message: types.Message):
         self.db = db
@@ -88,7 +20,8 @@ def __init__(self, db: DB, user: User, message: types.Message):
         self.function_manager = None
 
     async def process_dialog(self):
-        context_configuration = ContextConfiguration.get_config(self.user.current_model)
+        llm_model = get_model_by_name(self.user.current_model)
+        context_configuration = llm_model.context_configuration
         self.dialog_manager = DialogManager(self.db, self.user, context_configuration)
         await self.dialog_manager.process_dialog(self.message)
 
diff --git a/app/llm_models.py b/app/llm_models.py
new file mode 100644
index 0000000..be1978c
--- /dev/null
+++ b/app/llm_models.py
@@ -0,0 +1,192 @@
+import dataclasses
+from decimal import Decimal
+from functools import lru_cache
+
+import settings
+
+
+@dataclasses.dataclass
+class LLMPrice:
+    # price per 1000 tokens
+    input_tokens_price: Decimal
+    output_tokens_price: Decimal
+
+
+@dataclasses.dataclass
+class LLMContextConfiguration:
+    # long term memory is based on embedding context search
+    long_term_memory_tokens: int
+    # short term memory is used for storing last messages
+    short_term_memory_tokens: int
+    # length of summary to be generated when context is too long
+    summary_length: int
+    # hard limit for context size, when this limit is reached, processing is being stopped,
+    # summarization also cannot be done
+    hard_max_context_size: int
+
+
+@dataclasses.dataclass
+class LLMCapabilities:
+    function_calling: bool = False
+    tool_calling: bool = False
+    image_processing: bool = False
+
+
+class LLModel:
+    GPT_35_TURBO = 'gpt-3.5-turbo'
+    GPT_35_TURBO_16K = 'gpt-3.5-turbo-16k'
+    GPT_4 = 'gpt-4'
+    GPT_4_TURBO = 'gpt-4-turbo'
+    GPT_4_TURBO_PREVIEW = 'gpt-4-turbo-preview'
+    GPT_4_VISION_PREVIEW = 'gpt-4-vision-preview'
+    LLAMA3 = 'llama3'
+
+    def __init__(self, *, model_name: str, api_key, context_configuration, model_price=None, base_url=None,
+                 capabilities=None):
+        if model_price is None:
+            model_price = LLMPrice(input_tokens_price=Decimal('0'), output_tokens_price=Decimal('0'))
+
+        if capabilities is None:
+            capabilities = LLMCapabilities()
+
+        self.model_name = model_name
+        self.api_key = api_key
+        self.context_configuration = context_configuration
+        self.model_price = model_price
+        self.base_url = base_url
+        self.capabilities = capabilities
+
+
+@lru_cache
+def get_models():
+    models = {}
+
+    if settings.OPENAI_TOKEN:
+        models.update({
+            LLModel.GPT_35_TURBO: LLModel(
+                model_name=LLModel.GPT_35_TURBO,
+                api_key=settings.OPENAI_TOKEN,
+                context_configuration=LLMContextConfiguration(
+                    long_term_memory_tokens=512,
+                    short_term_memory_tokens=2560,
+                    summary_length=512,
+                    hard_max_context_size=5*1024,
+                ),
+                model_price=LLMPrice(
+                    input_tokens_price=Decimal('0.0005'),
+                    output_tokens_price=Decimal('0.0015'),
+                ),
+                capabilities=LLMCapabilities(
+                    function_calling=True,
+                ),
+            ),
+            LLModel.GPT_35_TURBO_16K: LLModel(
+                model_name=LLModel.GPT_35_TURBO_16K,
+                api_key=settings.OPENAI_TOKEN,
+                context_configuration=LLMContextConfiguration(
+                    long_term_memory_tokens=1024,
+                    short_term_memory_tokens=4096,
+                    summary_length=1024,
+                    hard_max_context_size=17*1024,
+                ),
+                model_price=LLMPrice(
+                    input_tokens_price=Decimal('0.003'),
+                    output_tokens_price=Decimal('0.004'),
+                ),
+                capabilities=LLMCapabilities(
+                    function_calling=True,
+                ),
+            ),
+            LLModel.GPT_4: LLModel(
+                model_name=LLModel.GPT_4,
+                api_key=settings.OPENAI_TOKEN,
+                context_configuration=LLMContextConfiguration(
+                    long_term_memory_tokens=512,
+                    short_term_memory_tokens=2048,
+                    summary_length=1024,
+                    hard_max_context_size=9*1024,
+                ),
+                model_price=LLMPrice(
+                    input_tokens_price=Decimal('0.03'),
+                    output_tokens_price=Decimal('0.06'),
+                ),
+                capabilities=LLMCapabilities(
+                    function_calling=True,
+                ),
+            ),
+            LLModel.GPT_4_TURBO: LLModel(
+                model_name=LLModel.GPT_4_TURBO,
+                api_key=settings.OPENAI_TOKEN,
+                context_configuration=LLMContextConfiguration(
+                    long_term_memory_tokens=512,
+                    short_term_memory_tokens=5120,
+                    summary_length=2048,
+                    hard_max_context_size=13*1024,
+                ),
+                model_price=LLMPrice(
+                    input_tokens_price=Decimal('0.01'),
+                    output_tokens_price=Decimal('0.03'),
+                ),
+                capabilities=LLMCapabilities(
+                    function_calling=True,
+                    image_processing=True,
+                ),
+            ),
+            LLModel.GPT_4_TURBO_PREVIEW: LLModel(
+                model_name=LLModel.GPT_4_TURBO_PREVIEW,
+                api_key=settings.OPENAI_TOKEN,
+                context_configuration=LLMContextConfiguration(
+                    long_term_memory_tokens=512,
+                    short_term_memory_tokens=5120,
+                    summary_length=2048,
+                    hard_max_context_size=13*1024,
+                ),
+                model_price=LLMPrice(
+                    input_tokens_price=Decimal('0.01'),
+                    output_tokens_price=Decimal('0.03'),
+                ),
+                capabilities=LLMCapabilities(
+                    function_calling=True,
+                ),
+            ),
+            LLModel.GPT_4_VISION_PREVIEW: LLModel(
+                model_name=LLModel.GPT_4_VISION_PREVIEW,
+                api_key=settings.OPENAI_TOKEN,
+                context_configuration=LLMContextConfiguration(
+                    long_term_memory_tokens=512,
+                    short_term_memory_tokens=5120,
+                    summary_length=2048,
+                    hard_max_context_size=13*1024,
+                ),
+                model_price=LLMPrice(
+                    input_tokens_price=Decimal('0.01'),
+                    output_tokens_price=Decimal('0.03'),
+                ),
+                capabilities=LLMCapabilities(
+                    image_processing=True,
+                ),
+            ),
+        })
+
+    # example of using llama3 model in ollama
+    if settings.OLLAMA_BASE_URL:
+        models[LLModel.LLAMA3] = LLModel(
+            model_name=LLModel.LLAMA3,
+            api_key=settings.OLLAMA_API_KEY,
+            context_configuration=LLMContextConfiguration(
+                long_term_memory_tokens=512,
+                short_term_memory_tokens=2048,
+                summary_length=512,
+                hard_max_context_size=13*1024,
+            ),
+            base_url=settings.OLLAMA_BASE_URL,
+        )
+
+    return models
+
+
+def get_model_by_name(model_name: str):
+    model = get_models().get(model_name)
+    if not model:
+        raise ValueError(f"Unknown model: {model_name}")
+    return model
diff --git a/app/openai_helpers/chatgpt.py b/app/openai_helpers/chatgpt.py
index 47f8ce5..e412434 100644
--- a/app/openai_helpers/chatgpt.py
+++ b/app/openai_helpers/chatgpt.py
@@ -4,25 +4,13 @@
 
 import settings
 from app.bot.utils import merge_dicts
+from app.llm_models import LLModel, get_model_by_name
 from app.openai_helpers.count_tokens import count_messages_tokens, count_tokens_from_functions, count_string_tokens
 from app.openai_helpers.function_storage import FunctionStorage
 
 import pydantic
 
-from app.openai_helpers.utils import OpenAIAsync
-
-
-class GptModel:
-    GPT_35_TURBO = 'gpt-3.5-turbo'
-    GPT_35_TURBO_16K = 'gpt-3.5-turbo-16k'
-    GPT_4 = 'gpt-4'
-    GPT_4_TURBO = 'gpt-4-turbo'
-    GPT_4_TURBO_PREVIEW = 'gpt-4-turbo-preview'
-    GPT_4_VISION_PREVIEW = 'gpt-4-vision-preview'
-
-
-GPT_MODELS = {GptModel.GPT_35_TURBO, GptModel.GPT_35_TURBO_16K, GptModel.GPT_4, GptModel.GPT_4_TURBO,
-              GptModel.GPT_4_TURBO_PREVIEW, GptModel.GPT_4_VISION_PREVIEW}
+from app.openai_helpers.llm_client import OpenAILLMClient
 
 
 class FunctionCall(pydantic.BaseModel):
@@ -88,20 +76,18 @@ def openai_message(self):
 class ChatGPT:
     def __init__(self, model, system_prompt: str, function_storage: FunctionStorage = None):
         self.function_storage = function_storage
-        if model not in GPT_MODELS:
-            raise ValueError(f"Unknown model: {model}")
-        self.model = model
+        self.llm_model = get_model_by_name(model)
         self.system_prompt = system_prompt
 
     async def send_messages(self, messages_to_send: List[DialogMessage]) -> (DialogMessage, CompletionUsage):
         additional_fields = {}
         if self.function_storage is not None:
             additional_fields.update({
-                'functions': self.function_storage.get_openai_prompt(),
+                'functions': self.function_storage.get_functions_info(),
                 'function_call': 'auto',
             })
 
-        if self.model == GptModel.GPT_4_VISION_PREVIEW:
+        if self.llm_model.model_name == LLModel.GPT_4_VISION_PREVIEW:
             # TODO: somewhy by default it's 16 tokens for this model
             additional_fields['max_tokens'] = 4096
 
@@ -112,13 +98,13 @@ async def send_messages(self, messages_to_send: List[DialogMessage]) -> (DialogM
                 del additional_fields['functions']
 
         messages = self.create_context(messages_to_send, self.system_prompt)
-        resp = await OpenAIAsync.instance().chat.completions.create(
-            model=self.model,
+        resp = await OpenAILLMClient.get_client(self.llm_model.model_name).chat.completions.create(
+            model=self.llm_model.model_name,
             messages=messages,
             temperature=settings.OPENAI_CHAT_COMPLETION_TEMPERATURE,
             **additional_fields,
         )
-        completion_usage = CompletionUsage(model=self.model, **dict(resp.usage))
+        completion_usage = CompletionUsage(model=self.llm_model.model_name, **dict(resp.usage))
         message = resp.choices[0].message
         response = DialogMessage(**dict(message))
         return response, completion_usage
@@ -127,17 +113,18 @@ async def send_messages_streaming(self, messages_to_send: List[DialogMessage], i
         prompt_tokens = 0
 
         additional_fields = {}
-        system_prompt_addition = None
         if self.function_storage is not None:
-            system_prompt_addition = self.function_storage.get_system_prompt_addition()
-            functions = self.function_storage.get_openai_prompt()
-            prompt_tokens += count_tokens_from_functions(functions, self.model)
-            additional_fields.update({
-                'functions': self.function_storage.get_openai_prompt(),
-                'function_call': 'auto',
-            })
-
-        if self.model == GptModel.GPT_4_VISION_PREVIEW:
+            if self.llm_model.capabilities.function_calling:
+                functions = self.function_storage.get_functions_info()
+                prompt_tokens += count_tokens_from_functions(functions, self.llm_model.model_name)
+                additional_fields.update({
+                    'functions': self.function_storage.get_functions_info(),
+                    'function_call': 'auto',
+                })
+            elif self.llm_model.capabilities.tool_calling:
+                NotImplementedError('Tool calling support is not implemented yet')
+
+        if self.llm_model.model_name == LLModel.GPT_4_VISION_PREVIEW:
             # TODO: somewhy by default it's 16 tokens for this model
             additional_fields['max_tokens'] = 4096
 
@@ -148,15 +135,15 @@ async def send_messages_streaming(self, messages_to_send: List[DialogMessage], i
                 del additional_fields['functions']
 
         messages = self.create_context(messages_to_send, self.system_prompt)
-        resp_generator = await OpenAIAsync.instance().chat.completions.create(
-            model=self.model,
+        resp_generator = await OpenAILLMClient.get_client(self.llm_model.model_name).chat.completions.create(
+            model=self.llm_model.model_name,
             messages=messages,
             temperature=settings.OPENAI_CHAT_COMPLETION_TEMPERATURE,
             stream=True,
             **additional_fields,
         )
 
-        prompt_tokens += count_messages_tokens(messages, self.model)
+        prompt_tokens += count_messages_tokens(messages, self.llm_model.model_name)
         result_dict = {}
         async for resp_part in resp_generator:
             delta = resp_part.choices[0].delta
@@ -168,19 +155,19 @@ async def send_messages_streaming(self, messages_to_send: List[DialogMessage], i
                     continue
                 result_dict = merge_dicts(result_dict, dict(delta))
                 dialog_message = DialogMessage(**result_dict)
-                completion_tokens = count_messages_tokens([result_dict], model=self.model)
+                completion_tokens = count_messages_tokens([result_dict], model=self.llm_model.model_name)
             elif delta.function_call is not None:
                 result_dict = merge_dicts(result_dict, dict(delta.function_call))
                 dialog_message = DialogMessage(function_call=result_dict)
                 # TODO: find mode accurate way to calculate completion length for function calls
-                completion_tokens = count_string_tokens(json.dumps(result_dict), model=self.model)
+                completion_tokens = count_string_tokens(json.dumps(result_dict), model=self.llm_model.model_name)
             else:
                 continue
 
             # openai doesn't return this field in streaming mode somewhy
             dialog_message.role = 'assistant'
             completion_usage = CompletionUsage(
-                model=self.model,
+                model=self.llm_model.model_name,
                 prompt_tokens=prompt_tokens,
                 completion_tokens=completion_tokens,
                 total_tokens=prompt_tokens + completion_tokens,
@@ -207,7 +194,7 @@ async def summarize_messages(messages: List[DialogMessage], model: str, summary_
         "role": "user",
         "content": f"Summarize this conversation in {summary_max_length} characters or less. Divide different themes explicitly with new lines. Return only text of summary, nothing else.",
     }]
-    resp = await OpenAIAsync.instance().chat.completions.create(
+    resp = await OpenAILLMClient.get_client(model).chat.completions.create(
         model=model,
         messages=prompt_messages,
         temperature=settings.OPENAI_CHAT_COMPLETION_TEMPERATURE,
diff --git a/app/openai_helpers/count_tokens.py b/app/openai_helpers/count_tokens.py
index 40cebac..3e495e0 100644
--- a/app/openai_helpers/count_tokens.py
+++ b/app/openai_helpers/count_tokens.py
@@ -21,7 +21,8 @@ def count_string_tokens(string: str, model="gpt-3.5-turbo") -> int:
     elif "gpt-4" in model:
         model = "gpt-4"
     else:
-        raise ValueError(f"Unknown model: {model}")
+        # TODO: add method to calculate tokens for different models
+        model = "gpt-4"
     encoding = tiktoken.encoding_for_model(model)
     return len(encoding.encode(str(string)))
 
@@ -43,7 +44,11 @@ def count_messages_tokens(messages: List[dict], model="gpt-3.5-turbo") -> int:
     tokens_per_message = 3
     tokens_per_name = 1
 
-    encoding = tiktoken.encoding_for_model(model)
+    try:
+        encoding = tiktoken.encoding_for_model(model)
+    except:
+        # TODO: add method to calculate tokens for different models
+        encoding = tiktoken.encoding_for_model("gpt-4")
 
     num_tokens = 0
     for message in messages:
@@ -84,7 +89,8 @@ def count_tokens_from_functions(functions, model="gpt-3.5-turbo"):
     elif "gpt-4" in model:
         model = "gpt-4"
     else:
-        raise ValueError(f"Unknown model: {model}")
+        # TODO: add method to calculate tokens for different models
+        model = "gpt-4"
 
     encoding = tiktoken.encoding_for_model(model)
     num_tokens = 0
diff --git a/app/openai_helpers/function_storage.py b/app/openai_helpers/function_storage.py
index 9d12a3b..4380c12 100644
--- a/app/openai_helpers/function_storage.py
+++ b/app/openai_helpers/function_storage.py
@@ -21,7 +21,7 @@ def extract_function_info(function) -> Dict[str, Any]:
             "parameters": function.get_params_schema(),
         }
 
-    def get_openai_prompt(self):
+    def get_functions_info(self):
         functions = []
         for function in self.functions.values():
             function_info = function['info']
@@ -29,6 +29,17 @@ def get_openai_prompt(self):
 
         return functions
 
+    def get_tools_info(self):
+        tools = []
+        for function in self.functions.values():
+            function_info = {
+                "type": "function",
+                "function": function['info'],
+            }
+            tools.append(function_info)
+
+        return tools
+
     def get_system_prompt_addition(self) -> str:
         result = []
         for function in self.functions.values():
diff --git a/app/openai_helpers/llm_client.py b/app/openai_helpers/llm_client.py
new file mode 100644
index 0000000..c528871
--- /dev/null
+++ b/app/openai_helpers/llm_client.py
@@ -0,0 +1,19 @@
+import openai
+
+from app.llm_models import get_model_by_name
+
+
+class OpenAILLMClient:
+    _model_clients = {}
+
+    @classmethod
+    def get_client(cls, model_name: str):
+        if model_name not in cls._model_clients:
+            llm_model = get_model_by_name(model_name)
+            params = {
+                'api_key': llm_model.api_key,
+            }
+            if llm_model.base_url:
+                params['base_url'] = llm_model.base_url
+            cls._model_clients[model_name] = openai.AsyncOpenAI(**params)
+        return cls._model_clients[model_name]
diff --git a/app/openai_helpers/utils.py b/app/openai_helpers/utils.py
index 91e9334..f822451 100644
--- a/app/openai_helpers/utils.py
+++ b/app/openai_helpers/utils.py
@@ -1,16 +1,7 @@
 from decimal import Decimal
 import openai
 
-
-COMPLETION_PRICE = {
-    'gpt-3.5-turbo': (Decimal('0.0005'), Decimal('0.0015')),
-    'gpt-3.5-turbo-16k': (Decimal('0.003'), Decimal('0.004')),
-    'gpt-4': (Decimal('0.03'), Decimal('0.06')),
-    'gpt-4-1106-preview': (Decimal('0.01'), Decimal('0.03')),
-    'gpt-4-vision-preview': (Decimal('0.01'), Decimal('0.03')),
-    'gpt-4-turbo-preview': (Decimal('0.01'), Decimal('0.03')),
-    'gpt-4-turbo': (Decimal('0.01'), Decimal('0.03')),
-}
+from app.llm_models import get_model_by_name
 
 WHISPER_PRICE = Decimal('0.006')
 
@@ -29,10 +20,11 @@
 
 
 def calculate_completion_usage_price(prompt_tokens: int, completion_tokens: int, model: str) -> Decimal:
-    price = COMPLETION_PRICE.get(model)
+    llm_model = get_model_by_name(model)
+    price = llm_model.model_price
     if not price:
         raise ValueError(f"Unknown model: {model}")
-    prompt_price, completion_price = price
+    prompt_price, completion_price = price.input_tokens_price, price.output_tokens_price
     return prompt_price * prompt_tokens / 1000 + completion_price * completion_tokens / 1000
 
 
@@ -54,14 +46,25 @@ def calculate_image_generation_usage_price(model, resolution, num_images):
 
 class OpenAIAsync:
     _key = None
+    _base_url = None
     _instance = None
 
     @classmethod
-    def init(cls, api_key):
+    def init(cls, api_key, base_url=None):
         cls._key = api_key
+        cls._base_url = base_url
 
     @classmethod
     def instance(cls):
+        params = {}
+        if cls._base_url:
+            params['base_url'] = cls._base_url
+
+        if cls._key is None:
+            raise ValueError("OpenAIAsync is not initialized")
+
+        params['api_key'] = cls._key
+
         if cls._instance is None:
-            cls._instance = openai.AsyncOpenAI(api_key=cls._key)
+            cls._instance = openai.AsyncOpenAI(**params)
         return cls._instance
diff --git a/main.py b/main.py
index b731c5c..0bddc3f 100644
--- a/main.py
+++ b/main.py
@@ -13,6 +13,7 @@
 
 
 if __name__ == '__main__':
+    # needed for whisper and tts capabilities
     OpenAIAsync.init(settings.OPENAI_TOKEN)
     telegram_bot = TelegramBot(bot, dp)
     telegram_bot.run()
diff --git a/settings.py b/settings.py
index 17a8e0c..be9a88d 100644
--- a/settings.py
+++ b/settings.py
@@ -17,11 +17,14 @@
                   'and assist users to the best of your abilities. Listen carefully to what they say, ask questions, '
                   'and help in any way you can. Avoid giving advices, your ultimate goal is to help the user to find the right solution by himself. '
                   'Ask only one question a time.',
-    }
+    },
+    'ai dungeon': {  # free to be deleted, also you can add new ones
+        'system': 'You are the AI Dungeon game. Your task is to entertain user with role play. User creates a setup and you play role of the world and characters in it.',
+     },
 }
 
 # Mandatory settings
-OPENAI_TOKEN = 'YOUR_TOKEN'
+OPENAI_TOKEN = ''
 TELEGRAM_BOT_TOKEN = 'YOUR_TOKEN'
 # Image proxy settings
 # This proxy is used to send images to openai for GPT-4-Vision
@@ -69,6 +72,9 @@
 IMAGE_PROXY_BIND_HOST = '0.0.0.0'
 IMAGE_PROXY_BIND_PORT = 8321
 
+OLLAMA_BASE_URL = ''
+OLLAMA_API_KEY = 'ollama'
+
 # Vectara RAG settings
 # this feature is highly experimental and not recommended to be used in it's current state
 # currently it even doesn't have instructions on how to setup, use it only if you feel experimenalish