Skip to content

Commit

Permalink
refactor & fixed chatroom name not modifying bug
Browse files Browse the repository at this point in the history
  • Loading branch information
c0sogi committed Jun 28, 2023
1 parent a1cb2eb commit b5fd98e
Show file tree
Hide file tree
Showing 26 changed files with 1,127 additions and 1,207 deletions.
73 changes: 7 additions & 66 deletions app/common/app_settings.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from contextlib import asynccontextmanager
from multiprocessing import Process
from os import kill
from signal import SIGINT
Expand All @@ -16,6 +17,7 @@
from starlette_admin.views import DropDown, Link

from app.auth.admin import MyAuthProvider
from app.common.app_settings_llama_cpp import monitor_llama_cpp_server
from app.common.config import JWT_SECRET, Config
from app.database.connection import cache, db
from app.database.schemas.auth import ApiKeys, ApiWhiteLists, Users
Expand All @@ -30,72 +32,6 @@
from app.viewmodels.admin import ApiKeyAdminView, UserAdminView


def check_health(url: str) -> bool:
"""Check if the given url is available or not"""
try:
schema = parse.urlparse(url).scheme
netloc = parse.urlparse(url).netloc
if requests.get(f"{schema}://{netloc}/health").status_code != 200:
return False
return True
except Exception:
return False


def start_llama_cpp_server(shared: Shared):
"""Start Llama CPP server. if it is already running, terminate it first."""
from app.start_llama_cpp_server import run

if shared.process.is_alive():
ApiLogger.cwarning("Terminating existing Llama CPP server")
shared.process.terminate()
shared.process.join()

ApiLogger.ccritical("Starting Llama CPP server")
shared.process = Process(target=run, daemon=True)
shared.process.start()


def shutdown_llama_cpp_server(shared: Shared):
"""Shutdown Llama CPP server."""
ApiLogger.ccritical("Shutting down Llama CPP server")
if shared.process.is_alive() and shared.process.pid:
kill(shared.process.pid, SIGINT)
shared.process.join()


def monitor_llama_cpp_server(
config: Config,
shared: Shared,
) -> None:
"""Monitors the Llama CPP server and handles server availability.
Parameters:
- `config: Config`: An object representing the server configuration.
- `shared: Shared`: An object representing shared data."""
thread_sigterm: Event = shared.thread_terminate_signal
if not config.llama_cpp_completion_url:
return
while True:
if not check_health(config.llama_cpp_completion_url):
if thread_sigterm.is_set():
break
if config.is_llama_cpp_booting:
continue
ApiLogger.cerror("Llama CPP server is not available")
config.is_llama_cpp_available = False
config.is_llama_cpp_booting = True
try:
start_llama_cpp_server(shared)
except ImportError:
ApiLogger.cerror("ImportError: Llama CPP server is not available")
return
else:
config.is_llama_cpp_booting = False
config.is_llama_cpp_available = True
shutdown_llama_cpp_server(shared)


async def on_startup():
"""
Performs necessary operations during application startup.
Expand Down Expand Up @@ -300,4 +236,9 @@ def create_app(config: Config) -> FastAPI:
)
new_app.state.config = config
new_app.state.shared = Shared()

@new_app.get("/health")
async def health():
return "ok"

return new_app
134 changes: 134 additions & 0 deletions app/common/app_settings_llama_cpp.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
from contextlib import asynccontextmanager
from multiprocessing import Process
from os import kill
from signal import SIGINT
from threading import Event
from urllib import parse

import requests
from fastapi import FastAPI
from starlette.middleware.cors import CORSMiddleware

from app.common.config import Config
from app.shared import Shared
from app.utils.logger import ApiLogger


def check_health(url: str) -> bool:
"""Check if the given url is available or not"""
try:
schema = parse.urlparse(url).scheme
netloc = parse.urlparse(url).netloc
if requests.get(f"{schema}://{netloc}/health").status_code != 200:
return False
return True
except Exception:
return False


def start_llama_cpp_server(config: Config, shared: Shared):
"""Start Llama CPP server. if it is already running, terminate it first."""

if shared.process.is_alive():
ApiLogger.cwarning("Terminating existing Llama CPP server")
shared.process.terminate()
shared.process.join()

if config.llama_cpp_server_port is None:
raise NotImplementedError("Llama CPP server port is not set")

ApiLogger.ccritical("Starting Llama CPP server")
shared.process = Process(
target=run_llama_cpp, args=(config.llama_cpp_server_port,), daemon=True
)
shared.process.start()


def shutdown_llama_cpp_server(shared: Shared):
"""Shutdown Llama CPP server."""
ApiLogger.ccritical("Shutting down Llama CPP server")
if shared.process.is_alive() and shared.process.pid:
kill(shared.process.pid, SIGINT)
shared.process.join()


def monitor_llama_cpp_server(
config: Config,
shared: Shared,
) -> None:
"""Monitors the Llama CPP server and handles server availability.
Parameters:
- `config: Config`: An object representing the server configuration.
- `shared: Shared`: An object representing shared data."""
thread_sigterm: Event = shared.thread_terminate_signal
if not config.llama_cpp_completion_url:
return
while True:
if not check_health(config.llama_cpp_completion_url):
if thread_sigterm.is_set():
break
if config.is_llama_cpp_booting:
continue
ApiLogger.cerror("Llama CPP server is not available")
config.is_llama_cpp_available = False
config.is_llama_cpp_booting = True
try:
start_llama_cpp_server(config=config, shared=shared)
except (ImportError, NotImplementedError):
ApiLogger.cerror("ImportError: Llama CPP server is not available")
return
except Exception:
continue
else:
config.is_llama_cpp_booting = False
config.is_llama_cpp_available = True
shutdown_llama_cpp_server(shared)


@asynccontextmanager
async def lifespan_llama_cpp(app: FastAPI):
ApiLogger.ccritical("🦙 Llama.cpp server is running")
yield
ApiLogger.ccritical("🦙 Shutting down llama.cpp server...")


def create_app_llama_cpp():
from app.routers import v1

new_app = FastAPI(
title="🦙 llama.cpp Python API",
version="0.0.1",
lifespan=lifespan_llama_cpp,
)
new_app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)

@new_app.get("/health")
async def health():
return "ok"

new_app.include_router(v1.router)
return new_app


def run_llama_cpp(port: int) -> None:
from uvicorn import Config, Server

from maintools import initialize_before_launch

initialize_before_launch()

Server(
config=Config(
create_app_llama_cpp(),
host="0.0.0.0",
port=port,
log_level="warning",
)
).run()
3 changes: 2 additions & 1 deletion app/common/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,9 +146,10 @@ class Config(metaclass=SingletonMetaClass):
allowed_sites: list[str] = field(default_factory=lambda: ["*"])
llama_cpp_completion_url: Optional[str] = "http://localhost:8002/v1/completions"
llama_cpp_embedding_url: Optional[str] = "http://localhost:8002/v1/embeddings"
llama_cpp_server_port: Optional[int] = 8002

def __post_init__(self):
self.is_llama_cpp_available: bool = self.llama_cpp_completion_url is not None
self.is_llama_cpp_available: bool = False
self.is_llama_cpp_booting: bool = False
if not DOCKER_MODE:
self.port = 8001
Expand Down
9 changes: 9 additions & 0 deletions app/common/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,10 @@ class DescriptionTemplates:
),
input_variables=[],
)
USER_AI__VERT_SHORT: PromptTemplate = PromptTemplate(
template="You are a helpful AI assistant.",
input_variables=[],
)

USER_AI__GAME: PromptTemplate = PromptTemplate(
template=(
Expand Down Expand Up @@ -96,6 +100,11 @@ class ChatTurnTemplates:
input_variables=["role", "content"],
template_format="f-string",
)
ROLE_CONTENT_5: PromptTemplate = PromptTemplate(
template="{role}: {content}\n",
input_variables=["role", "content"],
template_format="f-string",
)


class SummarizationTemplates:
Expand Down
28 changes: 28 additions & 0 deletions app/models/llms.py
Original file line number Diff line number Diff line change
Expand Up @@ -268,6 +268,7 @@ class LLMModels(EnumMixin):
max_total_tokens=2048, # context tokens (n_ctx)
max_tokens_per_request=1024, # The maximum number of tokens to generate.
token_margin=8,
prefix_template=None,
tokenizer=LlamaTokenizer("victor123/WizardLM-13B-1.0"),
model_path="wizardLM-13B-Uncensored.ggmlv3.q5_K_M.bin", # The filename of model. Must end with .bin.
user_chat_roles=UserChatRoles(
Expand Down Expand Up @@ -348,6 +349,33 @@ class LLMModels(EnumMixin):
prefix_template=None,
embedding=True,
)
orca_mini_3b = LlamaCppModel(
name="orca_mini_3B-GGML",
max_total_tokens=2048, # context tokens (n_ctx)
max_tokens_per_request=1024, # The maximum number of tokens to generate.
token_margin=8,
tokenizer=LlamaTokenizer("psmathur/orca_mini_3b"),
model_path="orca-mini-3b.ggmlv3.q4_1.bin", # The filename of model. Must end with .bin.
chat_turn_prompt=ChatTurnTemplates.ROLE_CONTENT_2,
user_chat_roles=UserChatRoles(
user="User",
ai="Response",
system="System",
),
)
guanaco_33b = LlamaCppModel(
name="guanaco-33B-GGML",
max_total_tokens=2048, # context tokens (n_ctx)
max_tokens_per_request=1024, # The maximum number of tokens to generate.
token_margin=8,
tokenizer=LlamaTokenizer("timdettmers/guanaco-33b-merged"),
model_path="guanaco-33B.ggmlv3.q3_K_S.bin", # The filename of model. Must end with .bin.
user_chat_roles=UserChatRoles(
user="Human",
ai="Assistant",
system="System",
),
)

@classmethod
def find_model_by_name(cls, name: str) -> LLMModel | None:
Expand Down
Loading

0 comments on commit b5fd98e

Please sign in to comment.