hotfix

c0sogi · Jul 3, 2023 · cde07e2 · cde07e2
1 parent c6d815c
commit cde07e2
Show file tree

Hide file tree

Showing 6 changed files with 89 additions and 28 deletions.
diff --git a/.env-sample b/.env-sample
@@ -1,19 +1,60 @@
-# DELETE THESE COMMENT LINE!!
-# DEFAULT_LLM_MODEL is defined in `LLM_MODELS` in `app\models\llms.py`
 
-API_ENV="test"
+# API_ENV can be "local", "test", "prod"
+API_ENV="local"
+
+# Port for Docker. If you run this app without Docker, this will be ignored to 8001
 PORT=8000
+
+# Default LLM model for each chat.
 DEFAULT_LLM_MODEL="gpt_3_5_turbo"
+
+# Your MySQL DB info
 MYSQL_DATABASE="traffic"
 MYSQL_TEST_DATABASE="testing_db"
 MYSQL_ROOT_PASSWORD="YOUR_MYSQL_PASSWORD_HERE"
 MYSQL_USER="traffic_admin"
 MYSQL_PASSWORD="YOUR_DB_ADMIN_PASSWORD_HERE"
+
+# Your Redis DB info
 REDIS_DATABASE="0"
 REDIS_PASSWORD="YOUR_REDIS_PASSWORD_HERE"
+
+# Your JWT secret key
 JWT_SECRET="ANY_PASSWORD_FOR_JWT_TOKEN_GENERATION_HERE"
 
+# Your OpenAI API key
 OPENAI_API_KEY="sk-*************"
+
+# Chatbot settings
+# Summarize for chat: Do token summarization for message more than SUMMARIZATION_THRESHOLD
+SUMMARIZE_FOR_CHAT=True
+SUMMARIZATION_THRESHOLD=512
+
+# Embedding text will be chunked by EMBEDDING_TOKEN_CHUNK_SIZE with EMBEDDING_TOKEN_CHUNK_OVERLAP
+# overlap means how many tokens will be overlapped between each chunk.
+EMBEDDING_TOKEN_CHUNK_SIZE=512
+EMBEDDING_TOKEN_CHUNK_OVERLAP=128
+
+# The shared vector collection name. This will be shared for all users.
+QDRANT_COLLECTION="SharedCollection"
+
+# If you want to set prefix or suffix for all prompt to LLM, set these.
+GLOBAL_PREFIX=""
+GLOBAL_SUFFIX=""
+
+# If you want to use local embedding instead of OpenAI's Ada-002,
+# set LOCAL_EMBEDDING_MODEL as "intfloat/e5-large-v2" or other huggingface embedding model repo.
+# Warning: Local embedding needs a lot of computing resources!!!
+LOCAL_EMBEDDING_MODEL=None
+
+
+# Define these if you want to open production server with API_ENV="prod"
+HOST_IP="OPTIONAL_YOUR_IP_HERE e.g. 192.168.0.2"
+HOST_MAIN="OPTIONAL_YOUR_DOMAIN_HERE e.g. yourdomain.com, if you are running API_ENV as production, this will be needed for TLS certificate registration"
+HOST_SUB="OPTIONAL_YOUR_SUB_DOMAIN_HERE e.g. mobile.yourdomain.com"
+MY_EMAIL="OPTIONAL_YOUR_DOMAIN_HERE e.g. yourdomain.com, if you are running API_ENV as production, this will be needed for TLS certificate registration"
+
+# Not used.
 AWS_ACCESS_KEY="OPTIONAL_IF_YOU_NEED"
 AWS_SECRET_KEY="OPTIONAL_IF_YOU_NEED"
 AWS_AUTHORIZED_EMAIL="OPTIONAL_IF_YOU_NEED"
@@ -23,10 +64,8 @@ SAMPLE_SECRET_KEY="OPTIONAL_IF_YOU_NEED_FOR_TESTING"
 KAKAO_RESTAPI_TOKEN="OPTIONAL_IF_YOU_NEED e.g. Bearer XXXXX"
 WEATHERBIT_API_KEY="OPTIONAL_IF_YOU_NEED"
 NASA_API_KEY="OPTIONAL_IF_YOU_NEED"
-HOST_IP="OPTIONAL_YOUR_IP_HERE e.g. 192.168.0.2"
-HOST_MAIN="OPTIONAL_YOUR_DOMAIN_HERE e.g. yourdomain.com, if you are running API_ENV as production, this will be needed for TLS certificate registration"
-HOST_SUB="OPTIONAL_YOUR_SUB_DOMAIN_HERE e.g. mobile.yourdomain.com"
-MY_EMAIL="OPTIONAL_YOUR_DOMAIN_HERE e.g. yourdomain.com, if you are running API_ENV as production, this will be needed for TLS certificate registration"
+
+# For translation. If you don't need translation, you can ignore these.
 PAPAGO_CLIENT_ID="OPTIONAL_FOR_TRANSTLATION"
 PAPAGO_CLIENT_SECRET="OPTIONAL_FOR_TRANSTLATION"
 GOOGLE_CLOUD_PROJECT_ID="OPTIONAL_FOR_TRANSTLATION e.g. top-abcd-01234"
@@ -35,13 +74,4 @@ GOOGLE_TRANSLATE_OAUTH_ID="OPTIONAL_FOR_TRANSTLATION"
 GOOGLE_TRANSLATE_OAUTH_SECRET="OPTIONAL_FOR_TRANSTLATION"
 RAPIDAPI_KEY="OPTIONAL_FOR_TRANSLATION"
 CUSTOM_TRANSLATE_URL="OPTIONAL_FOR_TRANSLATION"
-SUMMARIZE_FOR_CHAT=True
-SUMMARIZATION_THRESHOLD=512
-EMBEDDING_TOKEN_CHUNK_SIZE=512
-EMBEDDING_TOKEN_CHUNK_OVERLAP=128
-QDRANT_COLLECTION="SharedCollection"
-GLOBAL_PREFIX=""
-GLOBAL_SUFFIX=""
-LOCAL_EMBEDDING_MODEL=None
-# If you want to use local embedding instead of OpenAI's Ada-002,
-# set LOCAL_EMBEDDING_MODEL as "intfloat/e5-large-v2" or other huggingface embedding model repo.
+
diff --git a/app/models/llm_tokenizers.py b/app/models/llm_tokenizers.py
@@ -81,8 +81,8 @@ def __init__(self, model_name: str):
             from transformers.models.llama import LlamaTokenizer as _LlamaTokenizer
 
             self._tokenizer_type = _LlamaTokenizer
-            ApiLogger.cinfo("Tokenizer loaded: ", self.model_name)
-        except Exception:
+        except Exception as e:
+            ApiLogger.cwarning(str(e))
             self._tokenizer_type = None
         self.model_name = model_name
         self._tokenizer = None
@@ -129,7 +129,6 @@ def __init__(self, model_name: str):
             )
 
             self._tokenizer_type = _ExllamaTokenizer
-            ApiLogger.cinfo("Tokenizer loaded: ", self.model_name)
         except Exception:
             self._tokenizer_type = None
         self.model_name = model_name

diff --git a/app/routers/v1.py b/app/routers/v1.py
@@ -36,6 +36,7 @@
 
 logger = ApiLogger("||v1||")
 
+
 # Importing llama.cpp
 try:
     from app.utils.chat.text_generations.llama_cpp.generator import (

diff --git a/app/utils/chat/text_generations/llama_cpp/generator.py b/app/utils/chat/text_generations/llama_cpp/generator.py
@@ -18,8 +18,31 @@
 
 from .. import BaseCompletionGenerator
 
+
+logger = ApiLogger("||🦙 llama_cpp.generator||")
+
+
+def ensure_dll_exists() -> None:
+    import os
+    import subprocess
+
+    dll_path = "./repositories/llama_cpp/llama_cpp/llama.dll"
+
+    if not os.path.exists("./repositories"):
+        raise FileNotFoundError(
+            "🦙 Could not find llama-cpp-python repositories folder!"
+        )
+
+    if not os.path.exists(dll_path):
+        logger.critical("🦙 llama.cpp DLL not found, building it...")
+        build_script_path = "build-llama-cpp.bat"
+        subprocess.run([build_script_path])
+        logger.critical("🦙 llama.cpp DLL built!")
+
+
 sys.path.insert(0, str(Path("repositories/llama_cpp")))
 try:
+    ensure_dll_exists()
     from repositories.llama_cpp import llama_cpp
 
     print("🦙 llama-cpp-python repository found!")
@@ -34,8 +57,6 @@
 if TYPE_CHECKING:
     from app.models.llms import LlamaCppModel
 
-logger = ApiLogger("||🦙 llama_cpp.generator||")
-
 
 def _make_logit_bias_processor(
     llama: llama_cpp.Llama,

diff --git a/app/web/main.dart.js b/app/web/main.dart.js
@@ -88171,7 +88171,7 @@ var $async$Bg=A.N(function(b,c){if(b===1)return A.J(c,r)
 while(true)switch(s){case 0:p=q.a
 if(p==null)p=q.a=new A.arW(new A.a9z(q),new A.a9A(q),new A.a9B(q),new A.a9C(q))
 s=2
-return A.F(p.w9("ws://localhost:8000/ws/chat/"+a),$async$Bg)
+return A.F(p.w9("ws://localhost:8001/ws/chat/"+a),$async$Bg)
 case 2:return A.K(null,r)}})
 return A.L($async$Bg,r)},
 lc(){var s=0,r=A.M(t.H),q=this,p
@@ -88450,7 +88450,7 @@ case 3:s=5
 return A.F(A.a8a(a),$async$pJ)
 case 5:case 4:n=J
 s=6
-return A.F(A.nk(A.b([A.kv(a,null,B.q7,"Failed to fetch API Keys",new A.aiE(p),new A.aiF(p),200,"http://localhost:8000/api/user/apikeys"),A.kv(a,null,B.q7,"Failed to fetch user info",null,new A.aiG(p),200,"http://localhost:8000/api/user/me")],t.GP),t.v),$async$pJ)
+return A.F(A.nk(A.b([A.kv(a,null,B.q7,"Failed to fetch API Keys",new A.aiE(p),new A.aiF(p),200,"http://localhost:8001/api/user/apikeys"),A.kv(a,null,B.q7,"Failed to fetch user info",null,new A.aiG(p),200,"http://localhost:8001/api/user/me")],t.GP),t.v),$async$pJ)
 case 6:o=n.a7e(c,new A.aiH())
 if(!o.gaa(o).q()){q=null
 s=1
@@ -88464,7 +88464,7 @@ var $async$jP=A.N(function(d,e){if(d===1){o=e
 s=p}while(true)switch(s){case 0:p=4
 k=t.N
 s=7
-return A.F(A.kv(null,A.n(["email",b,"password",c],k,k),B.l5,"Failed to register",new A.aiI(),new A.aiJ(n),201,"http://localhost:8000/api/auth/register/email"),$async$jP)
+return A.F(A.kv(null,A.n(["email",b,"password",c],k,k),B.l5,"Failed to register",new A.aiI(),new A.aiJ(n),201,"http://localhost:8001/api/auth/register/email"),$async$jP)
 case 7:m=e
 k=m==null?A.WA("\uc131\uacf5\uc801\uc73c\ub85c \ud68c\uc6d0\uac00\uc785 \ub418\uc5c8\uc2b5\ub2c8\ub2e4.","Successfully registered"):A.wN(m,"Error")
 q=k
@@ -88490,7 +88490,7 @@ return A.L($async$jP,r)},
 mH(a){var s=0,r=A.M(t.Fd),q,p=this,o
 var $async$mH=A.N(function(b,c){if(b===1)return A.J(c,r)
 while(true)switch(s){case 0:s=3
-return A.F(A.kv(p.c,null,B.RO,"Failed to unregister",null,new A.aiK(p),204,"http://localhost:8000/api/auth/register"),$async$mH)
+return A.F(A.kv(p.c,null,B.RO,"Failed to unregister",null,new A.aiK(p),204,"http://localhost:8001/api/auth/register"),$async$mH)
 case 3:o=c
 q=o==null?A.WA("\uc131\uacf5\uc801\uc73c\ub85c \ud68c\uc6d0\ud0c8\ud1f4 \ub418\uc5c8\uc2b5\ub2c8\ub2e4.","Successfully unregistered"):A.wN(o,"Error")
 s=1
@@ -88503,7 +88503,7 @@ var $async$mr=A.N(function(c,d){if(c===1){o=d
 s=p}while(true)switch(s){case 0:p=4
 k=t.N
 s=7
-return A.F(A.kv(null,A.n(["email",a,"password",b],k,k),B.l5,"Failed to login",new A.aiC(),new A.aiD(n),200,"http://localhost:8000/api/auth/login/email"),$async$mr)
+return A.F(A.kv(null,A.n(["email",a,"password",b],k,k),B.l5,"Failed to login",new A.aiC(),new A.aiD(n),200,"http://localhost:8001/api/auth/login/email"),$async$mr)
 case 7:m=d
 k=m==null?A.WA("\uc131\uacf5\uc801\uc73c\ub85c \ub85c\uadf8\uc778 \ub418\uc5c8\uc2b5\ub2c8\ub2e4.","Successfully logged in"):A.wN(m,"Error")
 q=k
@@ -88565,7 +88565,7 @@ apz(a){var s=0,r=A.M(t.Fd),q,p=this,o,n
 var $async$m9=A.N(function(b,c){if(b===1)return A.J(c,r)
 while(true)switch(s){case 0:o=t.N
 s=3
-return A.F(A.kv(p.c,A.n(["user_memo",a],o,o),B.l5,"Failed to create API key.",null,new A.aiB(p),201,"http://localhost:8000/api/user/apikeys"),$async$m9)
+return A.F(A.kv(p.c,A.n(["user_memo",a],o,o),B.l5,"Failed to create API key.",null,new A.aiB(p),201,"http://localhost:8001/api/user/apikeys"),$async$m9)
 case 3:n=c
 q=n==null?A.WA("API \ud0a4\uac00 \uc131\uacf5\uc801\uc73c\ub85c \uc0dd\uc131\ub418\uc5c8\uc2b5\ub2c8\ub2e4.","Successfully created API key"):A.wN(n,"Error")
 s=1

diff --git a/build-llama-cpp.bat b/build-llama-cpp.bat
@@ -0,0 +1,10 @@
+cd ./repositories/llama_cpp/vendor/llama.cpp
+rmdir /s /q build
+mkdir build
+cd build
+cmake .. -DBUILD_SHARED_LIBS=ON -DLLAMA_CUBLAS=ON
+cmake --build . --config Release
+cd ../../../../..
+copy repositories\llama_cpp\vendor\llama.cpp\build\bin\Release\llama.dll repositories\llama_cpp\llama_cpp
+
+