diff --git a/.env-sample b/.env-sample index 8c998e6..a0552e0 100644 --- a/.env-sample +++ b/.env-sample @@ -1,19 +1,60 @@ -# DELETE THESE COMMENT LINE!! -# DEFAULT_LLM_MODEL is defined in `LLM_MODELS` in `app\models\llms.py` -API_ENV="test" +# API_ENV can be "local", "test", "prod" +API_ENV="local" + +# Port for Docker. If you run this app without Docker, this will be ignored to 8001 PORT=8000 + +# Default LLM model for each chat. DEFAULT_LLM_MODEL="gpt_3_5_turbo" + +# Your MySQL DB info MYSQL_DATABASE="traffic" MYSQL_TEST_DATABASE="testing_db" MYSQL_ROOT_PASSWORD="YOUR_MYSQL_PASSWORD_HERE" MYSQL_USER="traffic_admin" MYSQL_PASSWORD="YOUR_DB_ADMIN_PASSWORD_HERE" + +# Your Redis DB info REDIS_DATABASE="0" REDIS_PASSWORD="YOUR_REDIS_PASSWORD_HERE" + +# Your JWT secret key JWT_SECRET="ANY_PASSWORD_FOR_JWT_TOKEN_GENERATION_HERE" +# Your OpenAI API key OPENAI_API_KEY="sk-*************" + +# Chatbot settings +# Summarize for chat: Do token summarization for message more than SUMMARIZATION_THRESHOLD +SUMMARIZE_FOR_CHAT=True +SUMMARIZATION_THRESHOLD=512 + +# Embedding text will be chunked by EMBEDDING_TOKEN_CHUNK_SIZE with EMBEDDING_TOKEN_CHUNK_OVERLAP +# overlap means how many tokens will be overlapped between each chunk. +EMBEDDING_TOKEN_CHUNK_SIZE=512 +EMBEDDING_TOKEN_CHUNK_OVERLAP=128 + +# The shared vector collection name. This will be shared for all users. +QDRANT_COLLECTION="SharedCollection" + +# If you want to set prefix or suffix for all prompt to LLM, set these. +GLOBAL_PREFIX="" +GLOBAL_SUFFIX="" + +# If you want to use local embedding instead of OpenAI's Ada-002, +# set LOCAL_EMBEDDING_MODEL as "intfloat/e5-large-v2" or other huggingface embedding model repo. +# Warning: Local embedding needs a lot of computing resources!!! +LOCAL_EMBEDDING_MODEL=None + + +# Define these if you want to open production server with API_ENV="prod" +HOST_IP="OPTIONAL_YOUR_IP_HERE e.g. 192.168.0.2" +HOST_MAIN="OPTIONAL_YOUR_DOMAIN_HERE e.g. yourdomain.com, if you are running API_ENV as production, this will be needed for TLS certificate registration" +HOST_SUB="OPTIONAL_YOUR_SUB_DOMAIN_HERE e.g. mobile.yourdomain.com" +MY_EMAIL="OPTIONAL_YOUR_DOMAIN_HERE e.g. yourdomain.com, if you are running API_ENV as production, this will be needed for TLS certificate registration" + +# Not used. AWS_ACCESS_KEY="OPTIONAL_IF_YOU_NEED" AWS_SECRET_KEY="OPTIONAL_IF_YOU_NEED" AWS_AUTHORIZED_EMAIL="OPTIONAL_IF_YOU_NEED" @@ -23,10 +64,8 @@ SAMPLE_SECRET_KEY="OPTIONAL_IF_YOU_NEED_FOR_TESTING" KAKAO_RESTAPI_TOKEN="OPTIONAL_IF_YOU_NEED e.g. Bearer XXXXX" WEATHERBIT_API_KEY="OPTIONAL_IF_YOU_NEED" NASA_API_KEY="OPTIONAL_IF_YOU_NEED" -HOST_IP="OPTIONAL_YOUR_IP_HERE e.g. 192.168.0.2" -HOST_MAIN="OPTIONAL_YOUR_DOMAIN_HERE e.g. yourdomain.com, if you are running API_ENV as production, this will be needed for TLS certificate registration" -HOST_SUB="OPTIONAL_YOUR_SUB_DOMAIN_HERE e.g. mobile.yourdomain.com" -MY_EMAIL="OPTIONAL_YOUR_DOMAIN_HERE e.g. yourdomain.com, if you are running API_ENV as production, this will be needed for TLS certificate registration" + +# For translation. If you don't need translation, you can ignore these. PAPAGO_CLIENT_ID="OPTIONAL_FOR_TRANSTLATION" PAPAGO_CLIENT_SECRET="OPTIONAL_FOR_TRANSTLATION" GOOGLE_CLOUD_PROJECT_ID="OPTIONAL_FOR_TRANSTLATION e.g. top-abcd-01234" @@ -35,13 +74,4 @@ GOOGLE_TRANSLATE_OAUTH_ID="OPTIONAL_FOR_TRANSTLATION" GOOGLE_TRANSLATE_OAUTH_SECRET="OPTIONAL_FOR_TRANSTLATION" RAPIDAPI_KEY="OPTIONAL_FOR_TRANSLATION" CUSTOM_TRANSLATE_URL="OPTIONAL_FOR_TRANSLATION" -SUMMARIZE_FOR_CHAT=True -SUMMARIZATION_THRESHOLD=512 -EMBEDDING_TOKEN_CHUNK_SIZE=512 -EMBEDDING_TOKEN_CHUNK_OVERLAP=128 -QDRANT_COLLECTION="SharedCollection" -GLOBAL_PREFIX="" -GLOBAL_SUFFIX="" -LOCAL_EMBEDDING_MODEL=None -# If you want to use local embedding instead of OpenAI's Ada-002, -# set LOCAL_EMBEDDING_MODEL as "intfloat/e5-large-v2" or other huggingface embedding model repo. + diff --git a/app/models/llm_tokenizers.py b/app/models/llm_tokenizers.py index 905392a..52cc41c 100644 --- a/app/models/llm_tokenizers.py +++ b/app/models/llm_tokenizers.py @@ -81,8 +81,8 @@ def __init__(self, model_name: str): from transformers.models.llama import LlamaTokenizer as _LlamaTokenizer self._tokenizer_type = _LlamaTokenizer - ApiLogger.cinfo("Tokenizer loaded: ", self.model_name) - except Exception: + except Exception as e: + ApiLogger.cwarning(str(e)) self._tokenizer_type = None self.model_name = model_name self._tokenizer = None @@ -129,7 +129,6 @@ def __init__(self, model_name: str): ) self._tokenizer_type = _ExllamaTokenizer - ApiLogger.cinfo("Tokenizer loaded: ", self.model_name) except Exception: self._tokenizer_type = None self.model_name = model_name diff --git a/app/routers/v1.py b/app/routers/v1.py index 46033cc..893bba4 100644 --- a/app/routers/v1.py +++ b/app/routers/v1.py @@ -36,6 +36,7 @@ logger = ApiLogger("||v1||") + # Importing llama.cpp try: from app.utils.chat.text_generations.llama_cpp.generator import ( diff --git a/app/utils/chat/text_generations/llama_cpp/generator.py b/app/utils/chat/text_generations/llama_cpp/generator.py index 363a63d..1e1d55e 100644 --- a/app/utils/chat/text_generations/llama_cpp/generator.py +++ b/app/utils/chat/text_generations/llama_cpp/generator.py @@ -18,8 +18,31 @@ from .. import BaseCompletionGenerator + +logger = ApiLogger("||🦙 llama_cpp.generator||") + + +def ensure_dll_exists() -> None: + import os + import subprocess + + dll_path = "./repositories/llama_cpp/llama_cpp/llama.dll" + + if not os.path.exists("./repositories"): + raise FileNotFoundError( + "🦙 Could not find llama-cpp-python repositories folder!" + ) + + if not os.path.exists(dll_path): + logger.critical("🦙 llama.cpp DLL not found, building it...") + build_script_path = "build-llama-cpp.bat" + subprocess.run([build_script_path]) + logger.critical("🦙 llama.cpp DLL built!") + + sys.path.insert(0, str(Path("repositories/llama_cpp"))) try: + ensure_dll_exists() from repositories.llama_cpp import llama_cpp print("🦙 llama-cpp-python repository found!") @@ -34,8 +57,6 @@ if TYPE_CHECKING: from app.models.llms import LlamaCppModel -logger = ApiLogger("||🦙 llama_cpp.generator||") - def _make_logit_bias_processor( llama: llama_cpp.Llama, diff --git a/app/web/main.dart.js b/app/web/main.dart.js index 46cf6b4..ad18c49 100644 --- a/app/web/main.dart.js +++ b/app/web/main.dart.js @@ -88171,7 +88171,7 @@ var $async$Bg=A.N(function(b,c){if(b===1)return A.J(c,r) while(true)switch(s){case 0:p=q.a if(p==null)p=q.a=new A.arW(new A.a9z(q),new A.a9A(q),new A.a9B(q),new A.a9C(q)) s=2 -return A.F(p.w9("ws://localhost:8000/ws/chat/"+a),$async$Bg) +return A.F(p.w9("ws://localhost:8001/ws/chat/"+a),$async$Bg) case 2:return A.K(null,r)}}) return A.L($async$Bg,r)}, lc(){var s=0,r=A.M(t.H),q=this,p @@ -88450,7 +88450,7 @@ case 3:s=5 return A.F(A.a8a(a),$async$pJ) case 5:case 4:n=J s=6 -return A.F(A.nk(A.b([A.kv(a,null,B.q7,"Failed to fetch API Keys",new A.aiE(p),new A.aiF(p),200,"http://localhost:8000/api/user/apikeys"),A.kv(a,null,B.q7,"Failed to fetch user info",null,new A.aiG(p),200,"http://localhost:8000/api/user/me")],t.GP),t.v),$async$pJ) +return A.F(A.nk(A.b([A.kv(a,null,B.q7,"Failed to fetch API Keys",new A.aiE(p),new A.aiF(p),200,"http://localhost:8001/api/user/apikeys"),A.kv(a,null,B.q7,"Failed to fetch user info",null,new A.aiG(p),200,"http://localhost:8001/api/user/me")],t.GP),t.v),$async$pJ) case 6:o=n.a7e(c,new A.aiH()) if(!o.gaa(o).q()){q=null s=1 @@ -88464,7 +88464,7 @@ var $async$jP=A.N(function(d,e){if(d===1){o=e s=p}while(true)switch(s){case 0:p=4 k=t.N s=7 -return A.F(A.kv(null,A.n(["email",b,"password",c],k,k),B.l5,"Failed to register",new A.aiI(),new A.aiJ(n),201,"http://localhost:8000/api/auth/register/email"),$async$jP) +return A.F(A.kv(null,A.n(["email",b,"password",c],k,k),B.l5,"Failed to register",new A.aiI(),new A.aiJ(n),201,"http://localhost:8001/api/auth/register/email"),$async$jP) case 7:m=e k=m==null?A.WA("\uc131\uacf5\uc801\uc73c\ub85c \ud68c\uc6d0\uac00\uc785 \ub418\uc5c8\uc2b5\ub2c8\ub2e4.","Successfully registered"):A.wN(m,"Error") q=k @@ -88490,7 +88490,7 @@ return A.L($async$jP,r)}, mH(a){var s=0,r=A.M(t.Fd),q,p=this,o var $async$mH=A.N(function(b,c){if(b===1)return A.J(c,r) while(true)switch(s){case 0:s=3 -return A.F(A.kv(p.c,null,B.RO,"Failed to unregister",null,new A.aiK(p),204,"http://localhost:8000/api/auth/register"),$async$mH) +return A.F(A.kv(p.c,null,B.RO,"Failed to unregister",null,new A.aiK(p),204,"http://localhost:8001/api/auth/register"),$async$mH) case 3:o=c q=o==null?A.WA("\uc131\uacf5\uc801\uc73c\ub85c \ud68c\uc6d0\ud0c8\ud1f4 \ub418\uc5c8\uc2b5\ub2c8\ub2e4.","Successfully unregistered"):A.wN(o,"Error") s=1 @@ -88503,7 +88503,7 @@ var $async$mr=A.N(function(c,d){if(c===1){o=d s=p}while(true)switch(s){case 0:p=4 k=t.N s=7 -return A.F(A.kv(null,A.n(["email",a,"password",b],k,k),B.l5,"Failed to login",new A.aiC(),new A.aiD(n),200,"http://localhost:8000/api/auth/login/email"),$async$mr) +return A.F(A.kv(null,A.n(["email",a,"password",b],k,k),B.l5,"Failed to login",new A.aiC(),new A.aiD(n),200,"http://localhost:8001/api/auth/login/email"),$async$mr) case 7:m=d k=m==null?A.WA("\uc131\uacf5\uc801\uc73c\ub85c \ub85c\uadf8\uc778 \ub418\uc5c8\uc2b5\ub2c8\ub2e4.","Successfully logged in"):A.wN(m,"Error") q=k @@ -88565,7 +88565,7 @@ apz(a){var s=0,r=A.M(t.Fd),q,p=this,o,n var $async$m9=A.N(function(b,c){if(b===1)return A.J(c,r) while(true)switch(s){case 0:o=t.N s=3 -return A.F(A.kv(p.c,A.n(["user_memo",a],o,o),B.l5,"Failed to create API key.",null,new A.aiB(p),201,"http://localhost:8000/api/user/apikeys"),$async$m9) +return A.F(A.kv(p.c,A.n(["user_memo",a],o,o),B.l5,"Failed to create API key.",null,new A.aiB(p),201,"http://localhost:8001/api/user/apikeys"),$async$m9) case 3:n=c q=n==null?A.WA("API \ud0a4\uac00 \uc131\uacf5\uc801\uc73c\ub85c \uc0dd\uc131\ub418\uc5c8\uc2b5\ub2c8\ub2e4.","Successfully created API key"):A.wN(n,"Error") s=1 diff --git a/build-llama-cpp.bat b/build-llama-cpp.bat new file mode 100644 index 0000000..b07a780 --- /dev/null +++ b/build-llama-cpp.bat @@ -0,0 +1,10 @@ +cd ./repositories/llama_cpp/vendor/llama.cpp +rmdir /s /q build +mkdir build +cd build +cmake .. -DBUILD_SHARED_LIBS=ON -DLLAMA_CUBLAS=ON +cmake --build . --config Release +cd ../../../../.. +copy repositories\llama_cpp\vendor\llama.cpp\build\bin\Release\llama.dll repositories\llama_cpp\llama_cpp + +