Skip to content

Commit

Permalink
Added tokenbox, llama gpu offload, Modified /query
Browse files Browse the repository at this point in the history
  • Loading branch information
c0sogi committed May 20, 2023
1 parent 58c5074 commit 69249d0
Show file tree
Hide file tree
Showing 36 changed files with 34,809 additions and 34,339 deletions.
Binary file modified app/contents/chat_demo.gif
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added app/contents/code_demo.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified app/contents/embed_demo.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added app/contents/embed_file_demo.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
21 changes: 10 additions & 11 deletions app/database/schemas/auth.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import enum
from sqlalchemy import (
Column,
String,
Integer,
Enum,
Expand All @@ -17,22 +16,22 @@


class UserStatus(str, enum.Enum):
ACTIVE = "active"
DELETED = "deleted"
BLOCKED = "blocked"
active = "active"
deleted = "deleted"
blocked = "blocked"


class ApiKeyStatus(str, enum.Enum):
ACTIVE = "active"
STOPPED = "stopped"
DELETED = "deleted"
active = "active"
stopped = "stopped"
deleted = "deleted"


class Users(Base, Mixin):
__tablename__ = "users"
status = Column(String, Enum(UserStatus), default=UserStatus.ACTIVE, nullable=False)
email: Mapped[str] = mapped_column(String(length=20))
password: Mapped[str | None] = mapped_column(String(length=72))
status: Mapped[str] = mapped_column(Enum(UserStatus), default=UserStatus.active)
email: Mapped[str] = mapped_column(String(length=50))
password: Mapped[str | None] = mapped_column(String(length=100))
name: Mapped[str | None] = mapped_column(String(length=20))
phone_number: Mapped[str | None] = mapped_column(String(length=20))
profile_img: Mapped[str | None] = mapped_column(String(length=100))
Expand All @@ -49,7 +48,7 @@ class Users(Base, Mixin):

class ApiKeys(Base, Mixin):
__tablename__ = "api_keys"
status = Column(String, Enum(ApiKeyStatus), default=ApiKeyStatus.ACTIVE, nullable=False)
status: Mapped[str] = mapped_column(Enum(ApiKeyStatus), default=ApiKeyStatus.active)
access_key: Mapped[str] = mapped_column(String(length=64), index=True, unique=True)
secret_key: Mapped[str] = mapped_column(String(length=64))
user_memo: Mapped[str | None] = mapped_column(String(length=40))
Expand Down
18 changes: 18 additions & 0 deletions app/errors/api_exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,24 @@ class Responses_400:
msg="이미 사용중인 웹소켓입니다.",
detail="Websocket is already in use.",
)
invalid_email_format: APIException = APIException(
status_code=400,
internal_code=17,
msg="올바르지 않은 이메일 형식 입니다.",
detail="Invalid Email Format.",
)
email_length_not_in_range: APIException = APIException(
status_code=400,
internal_code=18,
msg="이메일은 6자 이상 50자 이하로 입력해주세요.",
detail="Email must be 6 ~ 50 characters.",
)
password_length_not_in_range: APIException = APIException(
status_code=400,
internal_code=19,
msg="비밀번호는 6자 이상 100자 이하로 입력해주세요.",
detail="Password must be 6 ~ 100 characters.",
)


@dataclass(frozen=True)
Expand Down
30 changes: 19 additions & 11 deletions app/models/llms.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from app.common.config import OPENAI_API_KEY

from app.models.llm_tokenizers import BaseTokenizer, LlamaTokenizer, OpenAITokenizer
from app.utils.chat.prompts import USER_AI_TMPL_CHAT
from app.utils.chat.prompts import USER_AI_TMPL_CHAT1


@dataclass
Expand All @@ -24,7 +24,7 @@ class LlamaCppModel(LLMModel):
n_parts: int = (
-1
) # Number of parts to split the model into. If -1, the number of parts is automatically determined.
n_gpu_layers: int = 32 # Number of layers to keep on the GPU. If 0, all layers are kept on the GPU.
n_gpu_layers: int = 30 # Number of layers to keep on the GPU. If 0, all layers are kept on the GPU.
seed: int = -1 # Seed. If -1, a random seed is used.
f16_kv: bool = True # Use half-precision for key/value cache.
logits_all: bool = False # Return logits for all tokens, not just the last token.
Expand All @@ -45,7 +45,7 @@ class LlamaCppModel(LLMModel):
top_p: Optional[float] = 0.95 # The top-p value to use for sampling.
logprobs: Optional[int] = None # The number of logprobs to return. If None, no logprobs are returned.
stop: Optional[list[str]] = field(
default_factory=lambda: ["\u200b"]
default_factory=lambda: []
) # A list of strings to stop generation when encountered.
repeat_penalty: Optional[float] = 1.1 # The penalty to apply to repeated tokens.
top_k: Optional[int] = 40 # The top-k value to use for sampling.
Expand Down Expand Up @@ -96,7 +96,17 @@ class LLMModels(Enum): # gpt models for openai api
max_tokens_per_request=2048,
token_margin=8,
tokenizer=OpenAITokenizer("gpt-3.5-turbo"),
api_url="https://biyan.xyz/proxy/openai/v1/chat/completions",
api_url="https://gookproxy-gyul.hf.space/proxy/openai/v1/chat/completions",
api_key="arcalive",
)

gpt_4_proxy = OpenAIModel(
name="gpt-4",
max_total_tokens=4096,
max_tokens_per_request=2048,
token_margin=8,
tokenizer=OpenAITokenizer("gpt-3.5-turbo"),
api_url="https://gookproxy-gyul.hf.space/proxy/openai/v1/chat/completions",
api_key="arcalive",
)

Expand All @@ -107,7 +117,7 @@ class LLMModels(Enum): # gpt models for openai api
token_margin=8,
tokenizer=LlamaTokenizer("ehartford/Wizard-Vicuna-7B-Uncensored"),
model_path="./llama_models/ggml/Wizard-Vicuna-7B-Uncensored.ggmlv2.q4_1.bin",
description=USER_AI_TMPL_CHAT,
description=USER_AI_TMPL_CHAT1,
)
wizard_vicuna_13b_uncensored = LlamaCppModel(
name="Wizard-Vicuna-13B-Uncensored",
Expand All @@ -116,7 +126,7 @@ class LLMModels(Enum): # gpt models for openai api
token_margin=8,
tokenizer=LlamaTokenizer("ehartford/Wizard-Vicuna-13B-Uncensored"),
model_path="./llama_models/ggml/Wizard-Vicuna-13B-Uncensored.ggml.q5_1.bin",
description=USER_AI_TMPL_CHAT,
description=USER_AI_TMPL_CHAT1,
)
gpt4_x_vicuna_13b = LlamaCppModel(
name="gpt4-x-vicuna-13B-GGML",
Expand All @@ -125,7 +135,7 @@ class LLMModels(Enum): # gpt models for openai api
token_margin=8,
tokenizer=LlamaTokenizer("junelee/wizard-vicuna-13b"),
model_path="./llama_models/ggml/gpt4-x-vicuna-13B.ggml.q4_0.bin",
description=USER_AI_TMPL_CHAT,
description=USER_AI_TMPL_CHAT1,
)
wizard_mega_13b = LlamaCppModel(
name="wizard-mega-13B-GGML",
Expand All @@ -134,8 +144,7 @@ class LLMModels(Enum): # gpt models for openai api
token_margin=8,
tokenizer=LlamaTokenizer("junelee/wizard-vicuna-13b"),
model_path="./llama_models/ggml/wizard-mega-13B.ggml.q4_0.bin",
stop=["\u200b", "</s>"],
description=USER_AI_TMPL_CHAT,
description=USER_AI_TMPL_CHAT1,
)
manticore_13b_uncensored = LlamaCppModel(
name="Manticore-13B-GGML",
Expand All @@ -144,8 +153,7 @@ class LLMModels(Enum): # gpt models for openai api
token_margin=8,
tokenizer=LlamaTokenizer("openaccess-ai-collective/manticore-13b"),
model_path="./llama_models/ggml/Manticore-13B.ggmlv2.q5_1.bin",
stop=["\u200b", "</s>"],
description=USER_AI_TMPL_CHAT,
description=USER_AI_TMPL_CHAT1,
)

@classmethod
Expand Down
18 changes: 17 additions & 1 deletion app/routers/auth.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,11 @@
from app.database.schemas.auth import Users
from app.dependencies import user_dependency
from app.errors.api_exceptions import Responses_400, Responses_404
from app.utils.auth.register_validation import (
is_email_length_in_range,
is_email_valid_format,
is_password_length_in_range,
)
from app.utils.auth.token import create_access_token, token_decode
from app.utils.chat.cache_manager import CacheManager
from app.viewmodels.base_models import SnsType, Token, UserRegister, UserToken
Expand Down Expand Up @@ -39,8 +44,19 @@ async def register(
if sns_type == SnsType.EMAIL:
if not (reg_info.email and reg_info.password):
raise Responses_400.no_email_or_password
if await is_email_exist(reg_info.email):

if is_email_length_in_range(email=reg_info.email) is False:
raise Responses_400.email_length_not_in_range

if is_password_length_in_range(password=reg_info.password) is False:
raise Responses_400.password_length_not_in_range

if is_email_valid_format(email=reg_info.email) is False:
raise Responses_400.invalid_email_format

if await is_email_exist(email=reg_info.email):
raise Responses_400.email_already_exists

hashed_password: str = bcrypt.hashpw(
password=reg_info.password.encode("utf-8"),
salt=bcrypt.gensalt(),
Expand Down
26 changes: 26 additions & 0 deletions app/utils/auth/register_validation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
import re

# Make a regular expression
# for validating an Email
EMAIL_REGEX: re.Pattern = re.compile(r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{1,7}\b")


# Define a function for
# for validating an Email
def is_email_valid_format(email: str) -> bool:
return True if EMAIL_REGEX.fullmatch(email) is not None else False


def is_email_length_in_range(email: str) -> bool:
return True if 6 <= len(email) <= 50 else False


def is_password_length_in_range(password: str) -> bool:
return True if 6 <= len(password) <= 100 else False


# Driver Code
if __name__ == "__main__":
for email in ("ankitrai326@gmail.com", "my.ownsite@our-earth.org", "ankitrai326.com", "aa@a.a"):
# calling run function
print(is_email_valid_format(email))
45 changes: 36 additions & 9 deletions app/utils/chat/chat_commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,7 @@ async def command_handler(
callback_name=callback_name,
callback_args=callback_args,
buffer=buffer,
translate=translate,
)
if response_type is ResponseType.DO_NOTHING:
return
Expand Down Expand Up @@ -578,17 +579,35 @@ async def testchaining(chain_size: int, buffer: BufferedUserContext) -> Tuple[st
return f"/testchaining {chain_size-1}", ResponseType.REPEAT_COMMAND

@staticmethod
@CommandResponse.handle_both
async def query(query: str, /, buffer: BufferedUserContext) -> str:
async def query(query: str, /, buffer: BufferedUserContext, **kwargs) -> Tuple[str | None, ResponseType]:
"""Query from redis vectorstore\n
/query <query>"""
k: int = 3
found: list[list[Document]] | None = await VectorStoreManager.asimilarity_search(
queries=[query], index_name=buffer.user_id, k=k
)
if found is not None and len(found[0]) > 0:
found_text: str = "\n\n".join([f"...{document.page_content}..." for document in found[0]])
query = CONTEXT_QUESTION_TMPL_QUERY1.format(question=query, context=found_text)
found_text_and_score: list[
list[Tuple[Document, float]]
] = await VectorStoreManager.asimilarity_search_multiple_index_with_score(
queries=[query], index_names=[buffer.user_id, ""], k=k
) # lower score is the better!

if len(found_text_and_score[0]) > 0:
found_text: str = "\n\n".join([document.page_content for document, _ in found_text_and_score[0]])
context_and_query: str = CONTEXT_QUESTION_TMPL_QUERY1.format(question=query, context=found_text)
await MessageHandler.user(
msg=context_and_query,
translate=kwargs.get("translate", False),
buffer=buffer,
)
await MessageHandler.ai(
translate=kwargs.get("translate", False),
buffer=buffer,
)
await MessageManager.set_message_history_safely(
user_chat_context=buffer.current_user_chat_context,
role=ChatRoles.USER,
new_content=query,
index=-1,
)
return None, ResponseType.DO_NOTHING
else:
await SendToWebsocket.message(
websocket=buffer.websocket,
Expand All @@ -597,7 +616,7 @@ async def query(query: str, /, buffer: BufferedUserContext) -> str:
finish=False,
model_name=buffer.current_user_chat_context.llm_model.value.name,
)
return query
return query, ResponseType.HANDLE_BOTH

@staticmethod
@CommandResponse.send_message_and_stop
Expand All @@ -606,3 +625,11 @@ async def embed(text_to_embed: str, /, buffer: BufferedUserContext) -> str:
/embed <text_to_embed>"""
await VectorStoreManager.create_documents(text=text_to_embed, index_name=buffer.user_id)
return "Embedding successful!"

@staticmethod
@CommandResponse.send_message_and_stop
async def share(text_to_embed: str, /) -> str:
"""Embed the text and save its vectors in the redis vectorstore. This index is shared for everyone.\n
/share <text_to_embed>"""
await VectorStoreManager.create_documents(text=text_to_embed, index_name="")
return "Embedding successful! This data will be shared for everyone."
Loading

0 comments on commit 69249d0

Please sign in to comment.