In [None]:
from byzerllm.apps.byzer_storage.simple_api import ByzerStorage, DataType, FieldOption,SortOption

# 初始化 EasyStorage
storage = ByzerStorage("byzerai_store", "projects", "auto-coder")
_ = (
    storage.schema_builder()
    ## 主键，字符串
    .add_field("_id", DataType.STRING)
    ## 可以作为where条件过滤的字段
    .add_field("name", DataType.STRING)
    ## 可以作为搜索字段
    .add_field("content", DataType.STRING, [FieldOption.ANALYZE])
    ## 不需要索引的字段，会原模原样存储
    .add_field("raw_content", DataType.STRING, [FieldOption.NO_INDEX])    
    ## 数组字段
    .add_array_field("summary", DataType.FLOAT)    
    ## 需要排序的字段
    .add_field("created_time", DataType.LONG, [FieldOption.SORT])    
    .execute()
)

## st(
# field(_id,long),
# field(name,string),
# field(content,string,analyze),
# field(raw_content,string,no_index),
# field(summary,array(float)),
# field(created_time,long,sort)
# )

# 准备数据
data = [
    {"_id": "1", "name": "Hello", "content": "Hello, world!", "raw_content": "Hello, world!", "summary": "hello world", "created_time": 1612137600},
    {"_id": "2", "name": "Byzer", "content": "Byzer, world!", "raw_content": "Byzer, world!", "summary": "byzer", "created_time": 1612137601},
    {"_id": "3", "name": "AI", "content": "AI, world!", "raw_content": "AI, world!", "summary": "AI", "created_time": 16121376002},
    {"_id": "4", "name": "ByzerAI", "content": "ByzerAI, world!", "raw_content": "ByzerAI, world!", "summary": "ByzerAi", "created_time": 16121376003},
]

# 写入Storage
storage.write_builder().add_items(data,vector_fields=["summary"],search_fields=["content"]).execute()

# commit 写入
storage.commit()

# 搜索
query = storage.query_builder()
query.set_vector_query("ByzerAi",fields=["summary"])
results = query.set_search_query("Hello",fields=["content"]).execute()
print(results)

## 过滤之后再做搜索
query = storage.query_builder()
query.and_filter().add_condition("name","AI").build()
query.set_vector_query("ByzerAi",fields="summary")
results = query.set_search_query("Hello",fields=["content"]).execute()
print(results)

## 过滤再做排序
query = storage.query_builder()
query.and_filter().add_condition("name","AI").build().sort("created_time",SortOption.DESC)
results = query.execute()
print(results)

## 根据ID做删除
storage.delete_by_ids(["3"])

query = storage.query_builder()
query.and_filter().add_condition("name","AI").build()
results = query.execute()
print("====")
print(results)

## 删除表
storage.drop_table()

In [None]:
from byzerllm.apps.byzer_storage.simple_api import ByzerStorage, DataType, FieldOption,SortOption
storage = ByzerStorage("byzerai_store", "my_database1", "my_table4s")
## 根据过滤条件删除数据
storage.query_builder().and_filter().add_condition("name","AI").build().delete()
# storage.delete_by_ids(["3"])

query = storage.query_builder()
query.and_filter().add_condition("name","AI").build()
results = query.execute()
print("====")
print(results)

In [None]:
from byzerllm.apps.byzer_storage.simple_api import ByzerStorage, DataType, FieldOption,SortOption

# 初始化 EasyStorage
storage = ByzerStorage("byzerai_store", "memory", "memory")
_ = (
    storage.schema_builder()
    ## 主键，字符串
    .add_field("_id", DataType.STRING)
    ## 可以作为where条件过滤的字段
    .add_field("name", DataType.STRING)
    ## 可以作为搜索字段
    .add_field("content", DataType.STRING, [FieldOption.ANALYZE])
    ## 不需要索引的字段，会原模原样存储
    .add_field("raw_content", DataType.STRING, [FieldOption.NO_INDEX])    
    ## 数组字段
    .add_array_field("summary", DataType.FLOAT)    
    ## 需要排序的字段
    .add_field("created_time", DataType.LONG, [FieldOption.SORT])    
    .execute()
)



In [None]:
import time

int(time.time())


In [None]:
import time
import uuid
def memorize(name:str,s:str):
    data = [
        {"_id": str(uuid.uuid4()), "name": name, "content": s, "raw_content": s, "summary": s, "created_time": int(time.time())},    
    ]
    storage.write_builder().add_items(data,vector_fields=["summary"],search_fields=["content"]).execute()

def recall(name:str,query:str):
    searcher = storage.query_builder()
    searcher.and_filter().add_condition("name",name).build()
    searcher.set_vector_query(query,fields="summary")
    results = searcher.set_search_query(query,fields=["content"]).execute() 
    return results

In [None]:
storage.delete_by_ids(["1"])

In [None]:
memorize("william","William的生日是2月1号")

In [None]:

recall("william","William生日是几号")

In [None]:
import byzerllm
import json

llm = byzerllm.ByzerLLM.from_default_model("deepseek_chat")

@byzerllm.prompt(llm=llm)
def ask_about_william(query:str)->str:
    '''
    根据下面的信息回答用户的问题。

    {{ context }}

    用户的问题：

    {{ query }}
    '''
    return {
        "context": json.dumps(recall("william",query),ensure_ascii=False)
    }

ask_about_william("William的生日是几号")

In [None]:
import byzerllm
llm = byzerllm.ByzerLLM.from_default_model("deepseek_chat")
llm.setup_default_emb_model_name("emb")
llm.emb_query("你好")

In [1]:
from byzerllm.apps.byzer_storage.simple_api import ByzerStorage, DataType, FieldOption,SortOption
storage = ByzerStorage("byzerai_store", "memory", "memory")
storage.emb("你好")

[32m2024-08-05 14:21:13.946[0m | [1mINFO    [0m | [36mbyzerllm.utils.connect_ray[0m:[36mconnect_cluster[0m:[36m48[0m - [1mJDK 21 will be used (/Users/allwefantasy/.auto-coder/jdk-21.0.2.jdk/Contents/Home)...[0m
2024-08-05 14:21:14,136	INFO worker.py:1564 -- Connecting to existing Ray cluster at address: 127.0.0.1:6379...
2024-08-05 14:21:14,162	INFO worker.py:1740 -- Connected to Ray cluster. View the dashboard at [1m[32m127.0.0.1:8265 [39m[22m


21


[0.02468225546181202,
 -0.033582352101802826,
 -0.01932729408144951,
 0.016576258465647697,
 0.004181655123829842,
 -0.01825554296374321,
 0.0009228317067027092,
 -0.026030316948890686,
 0.036056485027074814,
 0.025318816304206848,
 0.005778028164058924,
 0.010516527108848095,
 0.03486406058073044,
 -0.010437468066811562,
 0.016913728788495064,
 0.006378181278705597,
 0.02013174444437027,
 0.017821231856942177,
 -0.01038992777466774,
 -0.03506198897957802,
 0.016288915649056435,
 -0.017553389072418213,
 0.001788126421160996,
 0.015899658203125,
 -0.0070325150154531,
 0.005675456020981073,
 0.011551674455404282,
 0.02017924189567566,
 0.024007294327020645,
 -0.0076481252908706665,
 0.004231566563248634,
 0.012654876336455345,
 0.01321863941848278,
 -0.0032187537290155888,
 0.03308556601405144,
 0.013909615576267242,
 -0.0390632189810276,
 0.006769589148461819,
 0.008648551069200039,
 -0.028923893347382545,
 -0.014060108922421932,
 -0.011519079096615314,
 -0.009211969561874866,
 -0.04714

[33m(raylet)[0m Aug 05, 2024 3:17:32 PM org.apache.lucene.internal.vectorization.PanamaVectorizationProvider <init>
[33m(raylet)[0m INFO: Java vector incubator API enabled; uses preferredBitSize=256


[33m(raylet)[0m The autoscaler failed with the following error:
Terminated with signal 15
  File "/opt/miniconda3/envs/byzerllm/lib/python3.10/site-packages/ray/autoscaler/_private/monitor.py", line 709, in <module>
    monitor.run()
  File "/opt/miniconda3/envs/byzerllm/lib/python3.10/site-packages/ray/autoscaler/_private/monitor.py", line 584, in run
    self._run()
  File "/opt/miniconda3/envs/byzerllm/lib/python3.10/site-packages/ray/autoscaler/_private/monitor.py", line 438, in _run
    time.sleep(AUTOSCALER_UPDATE_INTERVAL_S)

