In [1]:
code_search_path=["/home/byzerllm/softwares/byzer-retrieval-lib/"]
env_vars = {"JAVA_HOME": "/home/byzerllm/softwares/jdk-21",
            "PATH":"/home/byzerllm/softwares/jdk-21/bin:/home/byzerllm/.rvm/gems/ruby-3.2.2/bin:/home/byzerllm/.rvm/gems/ruby-3.2.2@global/bin:/home/byzerllm/.rvm/rubies/ruby-3.2.2/bin:/home/byzerllm/.rbenv/shims:/home/byzerllm/.rbenv/bin:/home/byzerllm/softwares/byzer-lang-all-in-one-linux-amd64-3.3.0-2.3.7/jdk8/bin:/usr/local/cuda/bin:/usr/local/cuda/bin:/home/byzerllm/.rbenv/shims:/home/byzerllm/.rbenv/bin:/home/byzerllm/miniconda3/envs/byzerllm-dev/bin:/home/byzerllm/miniconda3/condabin:/home/byzerllm/.local/bin:/home/byzerllm/bin:/usr/local/cuda/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/home/byzerllm/.rvm/bin:/home/byzerllm/.rvm/bin"}

import os
os.environ["RAY_DEDUP_LOGS"] = "0" 

import ray
from byzerllm.utils.retrieval import ByzerRetrieval
from byzerllm.utils.client import ByzerLLM,LLMRequest,LLMResponse,LLMHistoryItem,LLMRequestExtra,InferBackend
from byzerllm.records import SearchQuery
from byzerllm.utils.client.data_analysis import ByzerDataAnalysis

ray.init(address="auto",namespace="default",ignore_reinit_error=True,
                 job_config=ray.job_config.JobConfig(code_search_path=code_search_path,
                                                      runtime_env={"env_vars": env_vars})
                 )   

# ray.init(address="auto",namespace="default")   

retrieval = ByzerRetrieval()
retrieval.launch_gateway()

llm = ByzerLLM()
chat_model_name = "chat"

# llm.undeploy(chat_model_name)
# llm.undeploy("emb")


# model_type = "deepseek"
# model_type = "openbuddy"
# model_type = "Qwen"
model_type = "zephyr"

if model_type == "deepseek":
    model_location="/home/byzerllm/models/deepseek2-llm-67b-chat"
    max_model_len = 4096
    gpus_per_worker = 8

if model_type == "llama2":
    model_location="/home/byzerllm/models/openbuddy-llama2-70b-v10.1-bf16"
    max_model_len = 4000
    gpus_per_worker = 8

if model_type == "Qwen":
    model_location="/home/byzerllm/models/Qwen-72B-Chat"
    max_model_len = 24000
    gpus_per_worker = 8

if model_type == "zephyr": 
    model_location="/home/byzerllm/models/openbuddy-zephyr-7b-v14.1"   
    max_model_len = 24000
    gpus_per_worker = 4

llm.setup_max_model_length("chat",max_model_len)

if not llm.is_model_exist("chat"):
    llm.setup_gpus_per_worker(gpus_per_worker).setup_num_workers(1).setup_infer_backend(InferBackend.VLLM)
    llm.deploy(
        model_path=model_location,
        pretrained_model_type="custom/auto",
        udf_name=chat_model_name,
        infer_params={"backend.max_num_batched_tokens":24000,
                      "backend.max_model_len":max_model_len}
    )


if not llm.is_model_exist("emb"):
    llm.setup_gpus_per_worker(0.4).setup_num_workers(2).setup_infer_backend(InferBackend.Transformers)
    llm.deploy(
        model_path="/home/byzerllm/models/bge-large-zh",
        pretrained_model_type="custom/bge",
        udf_name="emb",
        infer_params={}
    )    

llm.setup_default_model_name(chat_model_name) 
llm.setup_default_emb_model_name("emb") 

# ray.kill(ray.get_actor("data_analysis"))
# 如果存储集群不存在，启动一个存储集群
if not retrieval.is_cluster_exists("data_analysis"):
    builder = retrieval.cluster_builder()
    builder.set_name("data_analysis").set_location("/tmp/data_analysis").set_num_nodes(2).set_node_cpu(1).set_node_memory("3g")
    builder.set_java_home(env_vars["JAVA_HOME"]).set_path(env_vars["PATH"]).set_enable_zgc()
    builder.start_cluster()    

def show_code(lang,code_string):
    from IPython.display import display, Markdown    
    display(Markdown("```{}\n{}\n```".format(lang,code_string)))


def show_text(msg):
    from IPython.display import display, Markdown
    display(Markdown("```{}\n{}\n```".format("text",msg))) 

def show_image(content):
    from IPython.display import display, Image
    import base64             
    img = Image(base64.b64decode(content))
    display(img)    
    

2023-12-11 10:49:17,505	INFO worker.py:1458 -- Connecting to existing Ray cluster at address: 192.168.1.248:6379...
2023-12-11 10:49:17,518	INFO worker.py:1633 -- Connected to Ray cluster. View the dashboard at [1m[32m127.0.0.1:8265 [39m[22m


In [2]:
from byzerllm.apps.agent import Agents

from byzerllm.apps.agent.user_proxy_agent import UserProxyAgent
from byzerllm.apps.agent.extensions.retrieval_agent import RetrievalAgent


retrieval_agent = Agents.create_local_agent(RetrievalAgent,"retrieval_agent",llm,retrieval,
                                   max_consecutive_auto_reply=1000,
                                   code_agent = None
                                   )

user = Agents.create_local_agent(UserProxyAgent,"user",llm,retrieval,
                                human_input_mode="NEVER",
                                max_consecutive_auto_reply=0)



In [4]:
user.initiate_chat(
retrieval_agent,
message={
    "content":"介绍下 Gluten",
    "metadata":{
        "file_path":"/home/byzerllm/projects/jupyter-workspace/aritcle.txt"
    }
},)


user (to retrieval_agent):

介绍下 Gluten

--------------------------------------------------------------------------------
retrieval_agent (to user):

 Gluten 是一个开源项目，其目标是通过本地化 Spark 项目，以提高 Spark 在 ETL 批处理领域的性能。Gluten 项目由深耕于 Spark 加速的 Intel OAP 团队和 Kyligence 公司共建，BIGO 大数据团队于 2022 年 9 月加入研发。Gluten 项目的核心思想是使用本地的 Native 运行时库作为 Spark 插件，结合了 Spark 的可扩展性框架、优化器、容错能力和 Native 执行库的高性能，预期可以带来数倍的性能提升。

Gluten 项目主要支持 Velox 和 ClickHouse 两种 Native 执行库。Velox 是 Meta 开源的本地执行库，其目标是构建统一的查询执行层，提供了数据类型、向量数据结构、Pipeline 框架、算子、表达式计算、IO 和资源管理等功能。而 ClickHouse 是 2016 年 Yandex 公司开源的一种用于联机分析(OLAP)的列式数据库，以其高性能近年来备受关注。

BIGO 大数据团队在 2022 年 9 月加入 Gluten 项目后，已经在生产环境逐步灰度 Gluten，开始替换 Spark 的 ETL 工作负载，目前灰度 SQL 上获得了总体 40%+ 的成本节省。


--------------------------------------------------------------------------------
