Skip to content

Commit

Permalink
hotfix
Browse files Browse the repository at this point in the history
hotfix
  • Loading branch information
c0sogi committed Jul 3, 2023
1 parent c6d815c commit cde07e2
Show file tree
Hide file tree
Showing 6 changed files with 89 additions and 28 deletions.
64 changes: 47 additions & 17 deletions .env-sample
Original file line number Diff line number Diff line change
@@ -1,19 +1,60 @@
# DELETE THESE COMMENT LINE!!
# DEFAULT_LLM_MODEL is defined in `LLM_MODELS` in `app\models\llms.py`

API_ENV="test"
# API_ENV can be "local", "test", "prod"
API_ENV="local"

# Port for Docker. If you run this app without Docker, this will be ignored to 8001
PORT=8000

# Default LLM model for each chat.
DEFAULT_LLM_MODEL="gpt_3_5_turbo"

# Your MySQL DB info
MYSQL_DATABASE="traffic"
MYSQL_TEST_DATABASE="testing_db"
MYSQL_ROOT_PASSWORD="YOUR_MYSQL_PASSWORD_HERE"
MYSQL_USER="traffic_admin"
MYSQL_PASSWORD="YOUR_DB_ADMIN_PASSWORD_HERE"

# Your Redis DB info
REDIS_DATABASE="0"
REDIS_PASSWORD="YOUR_REDIS_PASSWORD_HERE"

# Your JWT secret key
JWT_SECRET="ANY_PASSWORD_FOR_JWT_TOKEN_GENERATION_HERE"

# Your OpenAI API key
OPENAI_API_KEY="sk-*************"

# Chatbot settings
# Summarize for chat: Do token summarization for message more than SUMMARIZATION_THRESHOLD
SUMMARIZE_FOR_CHAT=True
SUMMARIZATION_THRESHOLD=512

# Embedding text will be chunked by EMBEDDING_TOKEN_CHUNK_SIZE with EMBEDDING_TOKEN_CHUNK_OVERLAP
# overlap means how many tokens will be overlapped between each chunk.
EMBEDDING_TOKEN_CHUNK_SIZE=512
EMBEDDING_TOKEN_CHUNK_OVERLAP=128

# The shared vector collection name. This will be shared for all users.
QDRANT_COLLECTION="SharedCollection"

# If you want to set prefix or suffix for all prompt to LLM, set these.
GLOBAL_PREFIX=""
GLOBAL_SUFFIX=""

# If you want to use local embedding instead of OpenAI's Ada-002,
# set LOCAL_EMBEDDING_MODEL as "intfloat/e5-large-v2" or other huggingface embedding model repo.
# Warning: Local embedding needs a lot of computing resources!!!
LOCAL_EMBEDDING_MODEL=None


# Define these if you want to open production server with API_ENV="prod"
HOST_IP="OPTIONAL_YOUR_IP_HERE e.g. 192.168.0.2"
HOST_MAIN="OPTIONAL_YOUR_DOMAIN_HERE e.g. yourdomain.com, if you are running API_ENV as production, this will be needed for TLS certificate registration"
HOST_SUB="OPTIONAL_YOUR_SUB_DOMAIN_HERE e.g. mobile.yourdomain.com"
MY_EMAIL="OPTIONAL_YOUR_DOMAIN_HERE e.g. yourdomain.com, if you are running API_ENV as production, this will be needed for TLS certificate registration"

# Not used.
AWS_ACCESS_KEY="OPTIONAL_IF_YOU_NEED"
AWS_SECRET_KEY="OPTIONAL_IF_YOU_NEED"
AWS_AUTHORIZED_EMAIL="OPTIONAL_IF_YOU_NEED"
Expand All @@ -23,10 +64,8 @@ SAMPLE_SECRET_KEY="OPTIONAL_IF_YOU_NEED_FOR_TESTING"
KAKAO_RESTAPI_TOKEN="OPTIONAL_IF_YOU_NEED e.g. Bearer XXXXX"
WEATHERBIT_API_KEY="OPTIONAL_IF_YOU_NEED"
NASA_API_KEY="OPTIONAL_IF_YOU_NEED"
HOST_IP="OPTIONAL_YOUR_IP_HERE e.g. 192.168.0.2"
HOST_MAIN="OPTIONAL_YOUR_DOMAIN_HERE e.g. yourdomain.com, if you are running API_ENV as production, this will be needed for TLS certificate registration"
HOST_SUB="OPTIONAL_YOUR_SUB_DOMAIN_HERE e.g. mobile.yourdomain.com"
MY_EMAIL="OPTIONAL_YOUR_DOMAIN_HERE e.g. yourdomain.com, if you are running API_ENV as production, this will be needed for TLS certificate registration"

# For translation. If you don't need translation, you can ignore these.
PAPAGO_CLIENT_ID="OPTIONAL_FOR_TRANSTLATION"
PAPAGO_CLIENT_SECRET="OPTIONAL_FOR_TRANSTLATION"
GOOGLE_CLOUD_PROJECT_ID="OPTIONAL_FOR_TRANSTLATION e.g. top-abcd-01234"
Expand All @@ -35,13 +74,4 @@ GOOGLE_TRANSLATE_OAUTH_ID="OPTIONAL_FOR_TRANSTLATION"
GOOGLE_TRANSLATE_OAUTH_SECRET="OPTIONAL_FOR_TRANSTLATION"
RAPIDAPI_KEY="OPTIONAL_FOR_TRANSLATION"
CUSTOM_TRANSLATE_URL="OPTIONAL_FOR_TRANSLATION"
SUMMARIZE_FOR_CHAT=True
SUMMARIZATION_THRESHOLD=512
EMBEDDING_TOKEN_CHUNK_SIZE=512
EMBEDDING_TOKEN_CHUNK_OVERLAP=128
QDRANT_COLLECTION="SharedCollection"
GLOBAL_PREFIX=""
GLOBAL_SUFFIX=""
LOCAL_EMBEDDING_MODEL=None
# If you want to use local embedding instead of OpenAI's Ada-002,
# set LOCAL_EMBEDDING_MODEL as "intfloat/e5-large-v2" or other huggingface embedding model repo.

5 changes: 2 additions & 3 deletions app/models/llm_tokenizers.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,8 +81,8 @@ def __init__(self, model_name: str):
from transformers.models.llama import LlamaTokenizer as _LlamaTokenizer

self._tokenizer_type = _LlamaTokenizer
ApiLogger.cinfo("Tokenizer loaded: ", self.model_name)
except Exception:
except Exception as e:
ApiLogger.cwarning(str(e))
self._tokenizer_type = None
self.model_name = model_name
self._tokenizer = None
Expand Down Expand Up @@ -129,7 +129,6 @@ def __init__(self, model_name: str):
)

self._tokenizer_type = _ExllamaTokenizer
ApiLogger.cinfo("Tokenizer loaded: ", self.model_name)
except Exception:
self._tokenizer_type = None
self.model_name = model_name
Expand Down
1 change: 1 addition & 0 deletions app/routers/v1.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@

logger = ApiLogger("||v1||")


# Importing llama.cpp
try:
from app.utils.chat.text_generations.llama_cpp.generator import (
Expand Down
25 changes: 23 additions & 2 deletions app/utils/chat/text_generations/llama_cpp/generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,31 @@

from .. import BaseCompletionGenerator


logger = ApiLogger("||🦙 llama_cpp.generator||")


def ensure_dll_exists() -> None:
import os
import subprocess

dll_path = "./repositories/llama_cpp/llama_cpp/llama.dll"

if not os.path.exists("./repositories"):
raise FileNotFoundError(
"🦙 Could not find llama-cpp-python repositories folder!"
)

if not os.path.exists(dll_path):
logger.critical("🦙 llama.cpp DLL not found, building it...")
build_script_path = "build-llama-cpp.bat"
subprocess.run([build_script_path])
logger.critical("🦙 llama.cpp DLL built!")


sys.path.insert(0, str(Path("repositories/llama_cpp")))
try:
ensure_dll_exists()
from repositories.llama_cpp import llama_cpp

print("🦙 llama-cpp-python repository found!")
Expand All @@ -34,8 +57,6 @@
if TYPE_CHECKING:
from app.models.llms import LlamaCppModel

logger = ApiLogger("||🦙 llama_cpp.generator||")


def _make_logit_bias_processor(
llama: llama_cpp.Llama,
Expand Down
12 changes: 6 additions & 6 deletions app/web/main.dart.js
Original file line number Diff line number Diff line change
Expand Up @@ -88171,7 +88171,7 @@ var $async$Bg=A.N(function(b,c){if(b===1)return A.J(c,r)
while(true)switch(s){case 0:p=q.a
if(p==null)p=q.a=new A.arW(new A.a9z(q),new A.a9A(q),new A.a9B(q),new A.a9C(q))
s=2
return A.F(p.w9("ws://localhost:8000/ws/chat/"+a),$async$Bg)
return A.F(p.w9("ws://localhost:8001/ws/chat/"+a),$async$Bg)
case 2:return A.K(null,r)}})
return A.L($async$Bg,r)},
lc(){var s=0,r=A.M(t.H),q=this,p
Expand Down Expand Up @@ -88450,7 +88450,7 @@ case 3:s=5
return A.F(A.a8a(a),$async$pJ)
case 5:case 4:n=J
s=6
return A.F(A.nk(A.b([A.kv(a,null,B.q7,"Failed to fetch API Keys",new A.aiE(p),new A.aiF(p),200,"http://localhost:8000/api/user/apikeys"),A.kv(a,null,B.q7,"Failed to fetch user info",null,new A.aiG(p),200,"http://localhost:8000/api/user/me")],t.GP),t.v),$async$pJ)
return A.F(A.nk(A.b([A.kv(a,null,B.q7,"Failed to fetch API Keys",new A.aiE(p),new A.aiF(p),200,"http://localhost:8001/api/user/apikeys"),A.kv(a,null,B.q7,"Failed to fetch user info",null,new A.aiG(p),200,"http://localhost:8001/api/user/me")],t.GP),t.v),$async$pJ)
case 6:o=n.a7e(c,new A.aiH())
if(!o.gaa(o).q()){q=null
s=1
Expand All @@ -88464,7 +88464,7 @@ var $async$jP=A.N(function(d,e){if(d===1){o=e
s=p}while(true)switch(s){case 0:p=4
k=t.N
s=7
return A.F(A.kv(null,A.n(["email",b,"password",c],k,k),B.l5,"Failed to register",new A.aiI(),new A.aiJ(n),201,"http://localhost:8000/api/auth/register/email"),$async$jP)
return A.F(A.kv(null,A.n(["email",b,"password",c],k,k),B.l5,"Failed to register",new A.aiI(),new A.aiJ(n),201,"http://localhost:8001/api/auth/register/email"),$async$jP)
case 7:m=e
k=m==null?A.WA("\uc131\uacf5\uc801\uc73c\ub85c \ud68c\uc6d0\uac00\uc785 \ub418\uc5c8\uc2b5\ub2c8\ub2e4.","Successfully registered"):A.wN(m,"Error")
q=k
Expand All @@ -88490,7 +88490,7 @@ return A.L($async$jP,r)},
mH(a){var s=0,r=A.M(t.Fd),q,p=this,o
var $async$mH=A.N(function(b,c){if(b===1)return A.J(c,r)
while(true)switch(s){case 0:s=3
return A.F(A.kv(p.c,null,B.RO,"Failed to unregister",null,new A.aiK(p),204,"http://localhost:8000/api/auth/register"),$async$mH)
return A.F(A.kv(p.c,null,B.RO,"Failed to unregister",null,new A.aiK(p),204,"http://localhost:8001/api/auth/register"),$async$mH)
case 3:o=c
q=o==null?A.WA("\uc131\uacf5\uc801\uc73c\ub85c \ud68c\uc6d0\ud0c8\ud1f4 \ub418\uc5c8\uc2b5\ub2c8\ub2e4.","Successfully unregistered"):A.wN(o,"Error")
s=1
Expand All @@ -88503,7 +88503,7 @@ var $async$mr=A.N(function(c,d){if(c===1){o=d
s=p}while(true)switch(s){case 0:p=4
k=t.N
s=7
return A.F(A.kv(null,A.n(["email",a,"password",b],k,k),B.l5,"Failed to login",new A.aiC(),new A.aiD(n),200,"http://localhost:8000/api/auth/login/email"),$async$mr)
return A.F(A.kv(null,A.n(["email",a,"password",b],k,k),B.l5,"Failed to login",new A.aiC(),new A.aiD(n),200,"http://localhost:8001/api/auth/login/email"),$async$mr)
case 7:m=d
k=m==null?A.WA("\uc131\uacf5\uc801\uc73c\ub85c \ub85c\uadf8\uc778 \ub418\uc5c8\uc2b5\ub2c8\ub2e4.","Successfully logged in"):A.wN(m,"Error")
q=k
Expand Down Expand Up @@ -88565,7 +88565,7 @@ apz(a){var s=0,r=A.M(t.Fd),q,p=this,o,n
var $async$m9=A.N(function(b,c){if(b===1)return A.J(c,r)
while(true)switch(s){case 0:o=t.N
s=3
return A.F(A.kv(p.c,A.n(["user_memo",a],o,o),B.l5,"Failed to create API key.",null,new A.aiB(p),201,"http://localhost:8000/api/user/apikeys"),$async$m9)
return A.F(A.kv(p.c,A.n(["user_memo",a],o,o),B.l5,"Failed to create API key.",null,new A.aiB(p),201,"http://localhost:8001/api/user/apikeys"),$async$m9)
case 3:n=c
q=n==null?A.WA("API \ud0a4\uac00 \uc131\uacf5\uc801\uc73c\ub85c \uc0dd\uc131\ub418\uc5c8\uc2b5\ub2c8\ub2e4.","Successfully created API key"):A.wN(n,"Error")
s=1
Expand Down
10 changes: 10 additions & 0 deletions build-llama-cpp.bat
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
cd ./repositories/llama_cpp/vendor/llama.cpp
rmdir /s /q build
mkdir build
cd build
cmake .. -DBUILD_SHARED_LIBS=ON -DLLAMA_CUBLAS=ON
cmake --build . --config Release
cd ../../../../..
copy repositories\llama_cpp\vendor\llama.cpp\build\bin\Release\llama.dll repositories\llama_cpp\llama_cpp


0 comments on commit cde07e2

Please sign in to comment.