Litellm (#1265)

* litellm base version * 1. added missing test cases 2. removed stream from hyperparameters from prompt action * test cased fixed * 1. added missing test case 2. updated litellm * 1. added missing test case * 1. added missing test case * 1. added missing test case 2. removed deprecated api * test cases fixed * removed unused variable * fixed unused variable * fixed unused variable * added test cased for fetching logs * litellm base version * 1. added missing test cases 2. removed stream from hyperparameters from prompt action * test cased fixed * 1. added missing test case 2. updated litellm * 1. added missing test case * 1. added missing test case * 1. added missing test case 2. removed deprecated api * test cases fixed * removed unused variable * fixed unused variable * fixed unused variable * added test cased for fetching logs * added test cased for fetching logs * removed unused import * added invocation in metadata for litellm * 1. changed rasa rule policy to allow max history 2. changed rasa domain.yml schemas to allow unicode Alphabets for slots and form name * litellm base version * 1. added missing test cases 2. removed stream from hyperparameters from prompt action * test cased fixed * 1. added missing test case 2. updated litellm * 1. added missing test case * 1. added missing test case * 1. added missing test case 2. removed deprecated api * test cases fixed * removed unused variable * fixed unused variable * fixed unused variable * added test cased for fetching logs * added test cased for fetching logs * litellm base version * 1. added missing test cases 2. removed stream from hyperparameters from prompt action * test cased fixed * 1. added missing test case 2. updated litellm * 1. added missing test case * 1. added missing test case * 1. added missing test case 2. removed deprecated api * test cases fixed * removed unused variable * fixed unused variable * fixed unused variable * added test cased for fetching logs * removed unused import * added invocation in metadata for litellm * 1. changed rasa rule policy to allow max history 2. changed rasa domain.yml schemas to allow unicode Alphabets for slots and form name * test cases fixed after merging
digiteinfotech · Jul 3, 2024 · b8873b8 · b8873b8
1 parent 8fad4fc
commit b8873b8
Show file tree

Hide file tree

Showing 61 changed files with 2,320 additions and 2,642 deletions.
diff --git a/augmentation/paraphrase/gpt3/gpt.py b/augmentation/paraphrase/gpt3/gpt.py
@@ -1,7 +1,7 @@
 """Creates the Example and GPT classes for a user to interface with the OpenAI
 API."""
 
-import openai
+from openai import OpenAI
 import uuid
 
 
@@ -95,8 +95,9 @@ def submit_request(self, prompt, num_responses, api_key):
         """Calls the OpenAI API with the specified parameters."""
         if num_responses < 1:
             num_responses = 1
-        response = openai.Completion.create(api_key=api_key,
-                                            engine=self.get_engine(),
+        client = OpenAI(api_key=api_key)
+        response = client.completions.create(
+                                            model=self.get_engine(),
                                             prompt=self.craft_query(prompt),
                                             max_tokens=self.get_max_tokens(),
                                             temperature=self.get_temperature(),

diff --git a/custom/__init__.py b/custom/__init__.py
diff --git a/custom/fallback.py b/custom/fallback.py
diff --git a/custom/ner.py b/custom/ner.py
diff --git a/docker/Dockerfile b/docker/Dockerfile
@@ -38,6 +38,8 @@ COPY . .
 RUN rm -rf ${TEMPLATE_DIR_DEFAULT}/models/* && \
     rasa train --data ${TEMPLATE_DIR_DEFAULT}/data --config ${TEMPLATE_DIR_DEFAULT}/config.yml --domain ${TEMPLATE_DIR_DEFAULT}/domain.yml --out ${TEMPLATE_DIR_DEFAULT}/models
 
+RUN cp kairon/shared/rule_policy.py /usr/local/lib/python3.10/site-packages/rasa/core/policies/rule_policy.py
+RUN cp kairon/shared/schemas/domain.yml /usr/local/lib/python3.10/site-packages/rasa/shared/utils/schemas/domain.yml
 
 ENV HF_HOME="/home/cache" SENTENCE_TRANSFORMERS_HOME="/home/cache"
 
diff --git a/kairon/actions/definitions/database.py b/kairon/actions/definitions/database.py
@@ -83,7 +83,7 @@ async def execute(self, dispatcher: CollectingDispatcher, tracker: Tracker, doma
             request_body = ActionUtility.get_payload(payload, tracker)
             msg_logger.append(request_body)
             tracker_data = ActionUtility.build_context(tracker, True)
-            response = await vector_db.perform_operation(operation_type, request_body)
+            response = await vector_db.perform_operation(operation_type, request_body, user=tracker.sender_id)
             logger.info("response: " + str(response))
             response_context = self.__add_user_context_to_http_response(response, tracker_data)
             bot_response, bot_resp_log, _ = ActionUtility.compose_response(vector_action_config['response'], response_context)

diff --git a/kairon/actions/definitions/prompt.py b/kairon/actions/definitions/prompt.py
@@ -4,16 +4,15 @@
 from rasa_sdk import Tracker
 from rasa_sdk.executor import CollectingDispatcher
 
-from kairon import Utility
 from kairon.actions.definitions.base import ActionsBase
 from kairon.shared.actions.data_objects import ActionServerLogs
 from kairon.shared.actions.exception import ActionFailure
 from kairon.shared.actions.models import ActionType, UserMessageType
 from kairon.shared.actions.utils import ActionUtility
 from kairon.shared.constants import FAQ_DISABLED_ERR, KaironSystemSlots, KAIRON_USER_MSG_ENTITY
 from kairon.shared.data.constant import DEFAULT_NLU_FALLBACK_RESPONSE
-from kairon.shared.llm.factory import LLMFactory
 from kairon.shared.models import LlmPromptType, LlmPromptSource
+from kairon.shared.llm.processor import LLMProcessor
 
 
 class ActionPrompt(ActionsBase):
@@ -62,14 +61,18 @@ async def execute(self, dispatcher: CollectingDispatcher, tracker: Tracker, doma
         time_taken_slots = 0
         final_slots = {"type": "slots_to_fill"}
         llm_response_log = {"type": "llm_response"}
-
+        llm_processor = None
         try:
             k_faq_action_config, bot_settings = self.retrieve_config()
             user_question = k_faq_action_config.get('user_question')
             user_msg = self.__get_user_msg(tracker, user_question)
+            llm_type = k_faq_action_config['llm_type']
             llm_params = await self.__get_llm_params(k_faq_action_config, dispatcher, tracker, domain)
-            llm = LLMFactory.get_instance("faq")(self.bot, bot_settings["llm_settings"])
-            llm_response, time_taken_llm_response = await llm.predict(user_msg, **llm_params)
+            llm_processor = LLMProcessor(self.bot, llm_type)
+            llm_response, time_taken_llm_response = await llm_processor.predict(user_msg,
+                                                                                user=tracker.sender_id,
+                                                                                invocation='prompt_action',
+                                                                                **llm_params)
             status = "FAILURE" if llm_response.get("is_failure", False) is True else status
             exception = llm_response.get("exception")
             bot_response = llm_response['content']
@@ -93,8 +96,8 @@ async def execute(self, dispatcher: CollectingDispatcher, tracker: Tracker, doma
             total_time_elapsed = time_taken_llm_response + time_taken_slots
             events_to_extend = [llm_response_log, final_slots]
             events.extend(events_to_extend)
-            if llm:
-                llm_logs = llm.logs
+            if llm_processor:
+                llm_logs = llm_processor.logs
             ActionServerLogs(
                 type=ActionType.prompt_action.value,
                 intent=tracker.get_intent_of_latest_message(skip_fallback_intent=False),
@@ -119,16 +122,6 @@ async def execute(self, dispatcher: CollectingDispatcher, tracker: Tracker, doma
         return slots_to_fill
 
     async def __get_llm_params(self, k_faq_action_config: dict, dispatcher: CollectingDispatcher, tracker: Tracker, domain: Dict[Text, Any]):
-        implementations = {
-            "GPT3_FAQ_EMBED": self.__get_gpt_params,
-        }
-
-        llm_type = Utility.environment['llm']["faq"]
-        if not implementations.get(llm_type):
-            raise ActionFailure(f'{llm_type} type LLM is not supported')
-        return await implementations[Utility.environment['llm']["faq"]](k_faq_action_config, dispatcher, tracker, domain)
-
-    async def __get_gpt_params(self, k_faq_action_config: dict, dispatcher: CollectingDispatcher, tracker: Tracker, domain: Dict[Text, Any]):
         from kairon.actions.definitions.factory import ActionFactory
 
         system_prompt = None
@@ -147,7 +140,7 @@ async def __get_gpt_params(self, k_faq_action_config: dict, dispatcher: Collecti
                     history_prompt = ActionUtility.prepare_bot_responses(tracker, num_bot_responses)
                 elif prompt['source'] == LlmPromptSource.bot_content.value and prompt['is_enabled']:
                     use_similarity_prompt = True
-                    hyperparameters = prompt.get('hyperparameters', {})
+                    hyperparameters = prompt.get("hyperparameters", {})
                     similarity_prompt.append({'similarity_prompt_name': prompt['name'],
                                               'similarity_prompt_instructions': prompt['instructions'],
                                               'collection': prompt['data'],
@@ -179,7 +172,7 @@ async def __get_gpt_params(self, k_faq_action_config: dict, dispatcher: Collecti
                 is_query_prompt_enabled = True
                 query_prompt_dict.update({'query_prompt': query_prompt, 'use_query_prompt': is_query_prompt_enabled})
 
-        params["hyperparameters"] = k_faq_action_config.get('hyperparameters', Utility.get_llm_hyperparameters())
+        params["hyperparameters"] = k_faq_action_config['hyperparameters']
         params["system_prompt"] = system_prompt
         params["context_prompt"] = context_prompt
         params["query_prompt"] = query_prompt_dict

diff --git a/kairon/api/app/routers/bot/bot.py b/kairon/api/app/routers/bot/bot.py
@@ -26,7 +26,7 @@
 from kairon.shared.actions.data_objects import ActionServerLogs
 from kairon.shared.auth import Authentication
 from kairon.shared.constants import TESTER_ACCESS, DESIGNER_ACCESS, CHAT_ACCESS, UserActivityType, ADMIN_ACCESS, \
-    VIEW_ACCESS, EventClass, AGENT_ACCESS
+    EventClass, AGENT_ACCESS
 from kairon.shared.data.assets_processor import AssetsProcessor
 from kairon.shared.data.audit.processor import AuditDataProcessor
 from kairon.shared.data.constant import EVENT_STATUS, ENDPOINT_TYPE, TOKEN_TYPE, ModelTestType, \
@@ -38,11 +38,11 @@
 from kairon.shared.data.utils import DataUtility
 from kairon.shared.importer.data_objects import ValidationLogs
 from kairon.shared.importer.processor import DataImporterLogProcessor
+from kairon.shared.live_agent.live_agent import LiveAgentHandler
+from kairon.shared.llm.processor import LLMProcessor
 from kairon.shared.models import User, TemplateType
 from kairon.shared.test.processor import ModelTestingLogProcessor
 from kairon.shared.utils import Utility
-from kairon.shared.live_agent.live_agent import LiveAgentHandler
-
 
 router = APIRouter()
 v2 = APIRouter()
@@ -1668,3 +1668,20 @@ async def get_live_agent_token(current_user: User = Security(Authentication.get_
     data = await LiveAgentHandler.authenticate_agent(current_user.get_user(), current_user.get_bot())
     return Response(data=data)
 
+
+@router.get("/llm/logs", response_model=Response)
+async def get_llm_logs(
+        start_idx: int = 0, page_size: int = 10,
+        current_user: User = Security(Authentication.get_current_user_and_bot, scopes=TESTER_ACCESS)
+):
+    """
+    Get data llm event logs.
+    """
+    logs = list(LLMProcessor.get_logs(current_user.get_bot(), start_idx, page_size))
+    row_cnt = LLMProcessor.get_row_count(current_user.get_bot())
+    data = {
+        "logs": logs,
+        "total": row_cnt
+    }
+    return Response(data=data)
+