Added tokenbox, llama gpu offload, Modified /query

c0sogi · May 20, 2023 · 69249d0 · 69249d0
1 parent 58c5074
commit 69249d0
Show file tree

Hide file tree

Showing 36 changed files with 34,809 additions and 34,339 deletions.
diff --git a/app/contents/chat_demo.gif b/app/contents/chat_demo.gif
diff --git a/app/contents/code_demo.png b/app/contents/code_demo.png
diff --git a/app/contents/embed_demo.png b/app/contents/embed_demo.png
diff --git a/app/contents/embed_file_demo.png b/app/contents/embed_file_demo.png
diff --git a/app/database/schemas/auth.py b/app/database/schemas/auth.py
@@ -1,6 +1,5 @@
 import enum
 from sqlalchemy import (
-    Column,
     String,
     Integer,
     Enum,
@@ -17,22 +16,22 @@
 
 
 class UserStatus(str, enum.Enum):
-    ACTIVE = "active"
-    DELETED = "deleted"
-    BLOCKED = "blocked"
+    active = "active"
+    deleted = "deleted"
+    blocked = "blocked"
 
 
 class ApiKeyStatus(str, enum.Enum):
-    ACTIVE = "active"
-    STOPPED = "stopped"
-    DELETED = "deleted"
+    active = "active"
+    stopped = "stopped"
+    deleted = "deleted"
 
 
 class Users(Base, Mixin):
     __tablename__ = "users"
-    status = Column(String, Enum(UserStatus), default=UserStatus.ACTIVE, nullable=False)
-    email: Mapped[str] = mapped_column(String(length=20))
-    password: Mapped[str | None] = mapped_column(String(length=72))
+    status: Mapped[str] = mapped_column(Enum(UserStatus), default=UserStatus.active)
+    email: Mapped[str] = mapped_column(String(length=50))
+    password: Mapped[str | None] = mapped_column(String(length=100))
     name: Mapped[str | None] = mapped_column(String(length=20))
     phone_number: Mapped[str | None] = mapped_column(String(length=20))
     profile_img: Mapped[str | None] = mapped_column(String(length=100))
@@ -49,7 +48,7 @@ class Users(Base, Mixin):
 
 class ApiKeys(Base, Mixin):
     __tablename__ = "api_keys"
-    status = Column(String, Enum(ApiKeyStatus), default=ApiKeyStatus.ACTIVE, nullable=False)
+    status: Mapped[str] = mapped_column(Enum(ApiKeyStatus), default=ApiKeyStatus.active)
     access_key: Mapped[str] = mapped_column(String(length=64), index=True, unique=True)
     secret_key: Mapped[str] = mapped_column(String(length=64))
     user_memo: Mapped[str | None] = mapped_column(String(length=40))

diff --git a/app/errors/api_exceptions.py b/app/errors/api_exceptions.py
@@ -144,6 +144,24 @@ class Responses_400:
         msg="이미 사용중인 웹소켓입니다.",
         detail="Websocket is already in use.",
     )
+    invalid_email_format: APIException = APIException(
+        status_code=400,
+        internal_code=17,
+        msg="올바르지 않은 이메일 형식 입니다.",
+        detail="Invalid Email Format.",
+    )
+    email_length_not_in_range: APIException = APIException(
+        status_code=400,
+        internal_code=18,
+        msg="이메일은 6자 이상 50자 이하로 입력해주세요.",
+        detail="Email must be 6 ~ 50 characters.",
+    )
+    password_length_not_in_range: APIException = APIException(
+        status_code=400,
+        internal_code=19,
+        msg="비밀번호는 6자 이상 100자 이하로 입력해주세요.",
+        detail="Password must be 6 ~ 100 characters.",
+    )
 
 
 @dataclass(frozen=True)

diff --git a/app/models/llms.py b/app/models/llms.py
@@ -4,7 +4,7 @@
 from app.common.config import OPENAI_API_KEY
 
 from app.models.llm_tokenizers import BaseTokenizer, LlamaTokenizer, OpenAITokenizer
-from app.utils.chat.prompts import USER_AI_TMPL_CHAT
+from app.utils.chat.prompts import USER_AI_TMPL_CHAT1
 
 
 @dataclass
@@ -24,7 +24,7 @@ class LlamaCppModel(LLMModel):
     n_parts: int = (
         -1
     )  # Number of parts to split the model into. If -1, the number of parts is automatically determined.
-    n_gpu_layers: int = 32  # Number of layers to keep on the GPU. If 0, all layers are kept on the GPU.
+    n_gpu_layers: int = 30  # Number of layers to keep on the GPU. If 0, all layers are kept on the GPU.
     seed: int = -1  # Seed. If -1, a random seed is used.
     f16_kv: bool = True  # Use half-precision for key/value cache.
     logits_all: bool = False  # Return logits for all tokens, not just the last token.
@@ -45,7 +45,7 @@ class LlamaCppModel(LLMModel):
     top_p: Optional[float] = 0.95  # The top-p value to use for sampling.
     logprobs: Optional[int] = None  # The number of logprobs to return. If None, no logprobs are returned.
     stop: Optional[list[str]] = field(
-        default_factory=lambda: ["\u200b"]
+        default_factory=lambda: []
     )  # A list of strings to stop generation when encountered.
     repeat_penalty: Optional[float] = 1.1  # The penalty to apply to repeated tokens.
     top_k: Optional[int] = 40  # The top-k value to use for sampling.
@@ -96,7 +96,17 @@ class LLMModels(Enum):  # gpt models for openai api
         max_tokens_per_request=2048,
         token_margin=8,
         tokenizer=OpenAITokenizer("gpt-3.5-turbo"),
-        api_url="https://biyan.xyz/proxy/openai/v1/chat/completions",
+        api_url="https://gookproxy-gyul.hf.space/proxy/openai/v1/chat/completions",
+        api_key="arcalive",
+    )
+
+    gpt_4_proxy = OpenAIModel(
+        name="gpt-4",
+        max_total_tokens=4096,
+        max_tokens_per_request=2048,
+        token_margin=8,
+        tokenizer=OpenAITokenizer("gpt-3.5-turbo"),
+        api_url="https://gookproxy-gyul.hf.space/proxy/openai/v1/chat/completions",
         api_key="arcalive",
     )
 
@@ -107,7 +117,7 @@ class LLMModels(Enum):  # gpt models for openai api
         token_margin=8,
         tokenizer=LlamaTokenizer("ehartford/Wizard-Vicuna-7B-Uncensored"),
         model_path="./llama_models/ggml/Wizard-Vicuna-7B-Uncensored.ggmlv2.q4_1.bin",
-        description=USER_AI_TMPL_CHAT,
+        description=USER_AI_TMPL_CHAT1,
     )
     wizard_vicuna_13b_uncensored = LlamaCppModel(
         name="Wizard-Vicuna-13B-Uncensored",
@@ -116,7 +126,7 @@ class LLMModels(Enum):  # gpt models for openai api
         token_margin=8,
         tokenizer=LlamaTokenizer("ehartford/Wizard-Vicuna-13B-Uncensored"),
         model_path="./llama_models/ggml/Wizard-Vicuna-13B-Uncensored.ggml.q5_1.bin",
-        description=USER_AI_TMPL_CHAT,
+        description=USER_AI_TMPL_CHAT1,
     )
     gpt4_x_vicuna_13b = LlamaCppModel(
         name="gpt4-x-vicuna-13B-GGML",
@@ -125,7 +135,7 @@ class LLMModels(Enum):  # gpt models for openai api
         token_margin=8,
         tokenizer=LlamaTokenizer("junelee/wizard-vicuna-13b"),
         model_path="./llama_models/ggml/gpt4-x-vicuna-13B.ggml.q4_0.bin",
-        description=USER_AI_TMPL_CHAT,
+        description=USER_AI_TMPL_CHAT1,
     )
     wizard_mega_13b = LlamaCppModel(
         name="wizard-mega-13B-GGML",
@@ -134,8 +144,7 @@ class LLMModels(Enum):  # gpt models for openai api
         token_margin=8,
         tokenizer=LlamaTokenizer("junelee/wizard-vicuna-13b"),
         model_path="./llama_models/ggml/wizard-mega-13B.ggml.q4_0.bin",
-        stop=["\u200b", "</s>"],
-        description=USER_AI_TMPL_CHAT,
+        description=USER_AI_TMPL_CHAT1,
     )
     manticore_13b_uncensored = LlamaCppModel(
         name="Manticore-13B-GGML",
@@ -144,8 +153,7 @@ class LLMModels(Enum):  # gpt models for openai api
         token_margin=8,
         tokenizer=LlamaTokenizer("openaccess-ai-collective/manticore-13b"),
         model_path="./llama_models/ggml/Manticore-13B.ggmlv2.q5_1.bin",
-        stop=["\u200b", "</s>"],
-        description=USER_AI_TMPL_CHAT,
+        description=USER_AI_TMPL_CHAT1,
     )
 
     @classmethod

diff --git a/app/routers/auth.py b/app/routers/auth.py
@@ -7,6 +7,11 @@
 from app.database.schemas.auth import Users
 from app.dependencies import user_dependency
 from app.errors.api_exceptions import Responses_400, Responses_404
+from app.utils.auth.register_validation import (
+    is_email_length_in_range,
+    is_email_valid_format,
+    is_password_length_in_range,
+)
 from app.utils.auth.token import create_access_token, token_decode
 from app.utils.chat.cache_manager import CacheManager
 from app.viewmodels.base_models import SnsType, Token, UserRegister, UserToken
@@ -39,8 +44,19 @@ async def register(
     if sns_type == SnsType.EMAIL:
         if not (reg_info.email and reg_info.password):
             raise Responses_400.no_email_or_password
-        if await is_email_exist(reg_info.email):
+
+        if is_email_length_in_range(email=reg_info.email) is False:
+            raise Responses_400.email_length_not_in_range
+
+        if is_password_length_in_range(password=reg_info.password) is False:
+            raise Responses_400.password_length_not_in_range
+
+        if is_email_valid_format(email=reg_info.email) is False:
+            raise Responses_400.invalid_email_format
+
+        if await is_email_exist(email=reg_info.email):
             raise Responses_400.email_already_exists
+
         hashed_password: str = bcrypt.hashpw(
             password=reg_info.password.encode("utf-8"),
             salt=bcrypt.gensalt(),

diff --git a/app/utils/auth/register_validation.py b/app/utils/auth/register_validation.py
@@ -0,0 +1,26 @@
+import re
+
+# Make a regular expression
+# for validating an Email
+EMAIL_REGEX: re.Pattern = re.compile(r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{1,7}\b")
+
+
+# Define a function for
+# for validating an Email
+def is_email_valid_format(email: str) -> bool:
+    return True if EMAIL_REGEX.fullmatch(email) is not None else False
+
+
+def is_email_length_in_range(email: str) -> bool:
+    return True if 6 <= len(email) <= 50 else False
+
+
+def is_password_length_in_range(password: str) -> bool:
+    return True if 6 <= len(password) <= 100 else False
+
+
+# Driver Code
+if __name__ == "__main__":
+    for email in ("ankitrai326@gmail.com", "my.ownsite@our-earth.org", "ankitrai326.com", "aa@a.a"):
+        # calling run function
+        print(is_email_valid_format(email))
diff --git a/app/utils/chat/chat_commands.py b/app/utils/chat/chat_commands.py
@@ -112,6 +112,7 @@ async def command_handler(
         callback_name=callback_name,
         callback_args=callback_args,
         buffer=buffer,
+        translate=translate,
     )
     if response_type is ResponseType.DO_NOTHING:
         return
@@ -578,17 +579,35 @@ async def testchaining(chain_size: int, buffer: BufferedUserContext) -> Tuple[st
         return f"/testchaining {chain_size-1}", ResponseType.REPEAT_COMMAND
 
     @staticmethod
-    @CommandResponse.handle_both
-    async def query(query: str, /, buffer: BufferedUserContext) -> str:
+    async def query(query: str, /, buffer: BufferedUserContext, **kwargs) -> Tuple[str | None, ResponseType]:
         """Query from redis vectorstore\n
         /query <query>"""
         k: int = 3
-        found: list[list[Document]] | None = await VectorStoreManager.asimilarity_search(
-            queries=[query], index_name=buffer.user_id, k=k
-        )
-        if found is not None and len(found[0]) > 0:
-            found_text: str = "\n\n".join([f"...{document.page_content}..." for document in found[0]])
-            query = CONTEXT_QUESTION_TMPL_QUERY1.format(question=query, context=found_text)
+        found_text_and_score: list[
+            list[Tuple[Document, float]]
+        ] = await VectorStoreManager.asimilarity_search_multiple_index_with_score(
+            queries=[query], index_names=[buffer.user_id, ""], k=k
+        )  # lower score is the better!
+
+        if len(found_text_and_score[0]) > 0:
+            found_text: str = "\n\n".join([document.page_content for document, _ in found_text_and_score[0]])
+            context_and_query: str = CONTEXT_QUESTION_TMPL_QUERY1.format(question=query, context=found_text)
+            await MessageHandler.user(
+                msg=context_and_query,
+                translate=kwargs.get("translate", False),
+                buffer=buffer,
+            )
+            await MessageHandler.ai(
+                translate=kwargs.get("translate", False),
+                buffer=buffer,
+            )
+            await MessageManager.set_message_history_safely(
+                user_chat_context=buffer.current_user_chat_context,
+                role=ChatRoles.USER,
+                new_content=query,
+                index=-1,
+            )
+            return None, ResponseType.DO_NOTHING
         else:
             await SendToWebsocket.message(
                 websocket=buffer.websocket,
@@ -597,7 +616,7 @@ async def query(query: str, /, buffer: BufferedUserContext) -> str:
                 finish=False,
                 model_name=buffer.current_user_chat_context.llm_model.value.name,
             )
-        return query
+            return query, ResponseType.HANDLE_BOTH
 
     @staticmethod
     @CommandResponse.send_message_and_stop
@@ -606,3 +625,11 @@ async def embed(text_to_embed: str, /, buffer: BufferedUserContext) -> str:
         /embed <text_to_embed>"""
         await VectorStoreManager.create_documents(text=text_to_embed, index_name=buffer.user_id)
         return "Embedding successful!"
+
+    @staticmethod
+    @CommandResponse.send_message_and_stop
+    async def share(text_to_embed: str, /) -> str:
+        """Embed the text and save its vectors in the redis vectorstore. This index is shared for everyone.\n
+        /share <text_to_embed>"""
+        await VectorStoreManager.create_documents(text=text_to_embed, index_name="")
+        return "Embedding successful! This data will be shared for everyone."