# 流程

以图片搜索为例，展示如何使用 OSS VectorBucket 的向量检索能力。

1. 准备演示数据
    1. 准备一批样式丰富的图片，上传到 OSS
    2. 使用百炼模型，将图片转换为向量
2. 演示 Bucket
3. 演示 Index
4. 演示 Vector

# 数据演示准备

In [None]:
import os

from PIL import Image

image_path = "../data/photograph/"
image_files = os.listdir(image_path)
print(f"image count:{len(image_files)}")

for image_name in image_files[1:2]:
    img = Image.open(image_path + image_name)
    display(img)

### 图片上传到 OSS 控制台

图片上传操作这里不进行展示。

### Embedding

这里以某一张图片进行示例，后续会批量进行 Embedding. 

In [None]:
from dashscope import MultiModalEmbeddingItemImage
import dashscope

image_url = "http://oss-vector-resources.oss-cn-hangzhou.aliyuncs.com/photograph/Zsd0YhBa8LM.jpg"
resp = dashscope.MultiModalEmbedding.call(
    model="multimodal-embedding-v1",
    input=[MultiModalEmbeddingItemImage(image=image_url, factor=1.0)]
)
print(resp)

下面展示多模态的文本能力。 因模型支持文本、图片、视频等，只要把数据写入到VectorBucket中，查询时候可以做到图搜图、文搜图能力。

In [None]:
from dashscope import MultiModalEmbeddingItemText
import dashscope

resp = dashscope.MultiModalEmbedding.call(
    model="multimodal-embedding-v1",
    input=[MultiModalEmbeddingItemText(text="风景优美的西湖", factor=1.0)]
)
print(resp)

# 初始化 Client 

In [None]:
import os

import alibabacloud_oss_v2 as oss
import alibabacloud_oss_v2.vectors as oss_vectors

access_key_id = os.environ.get('oss_test_access_key_id')
access_key_secret = os.environ.get('oss_test_access_key_secret')
region = os.environ.get('oss_test_region')
account_id = os.environ.get('oss_test_account_id')

print(f"region:{region}, account_id:{account_id}")

cfg = oss.config.load_default()
cfg.credentials_provider = oss.credentials.StaticCredentialsProvider(access_key_id, access_key_secret)
cfg.region = region
cfg.account_id = account_id
client = oss_vectors.Client(cfg)

vector_bucket_name = "my-test-2"
vector_index_name = "test1"
dimension = 1024

# Bucket 操作



### 创建 Bucket

In [None]:
try:
    result = client.put_vector_bucket(oss_vectors.models.PutVectorBucketRequest(
        bucket=vector_bucket_name,
    ))
    print(f'status code: {result.status_code}, request id: {result.request_id}')
except Exception as e:
    print(e)


### List Bucket

In [None]:
result = client.list_vector_buckets(oss_vectors.models.ListVectorBucketsRequest())
print([bucket.name for bucket in result.buckets])

### Get Bucket

In [None]:
result = client.get_vector_bucket(oss_vectors.models.GetVectorBucketRequest(
    bucket=vector_bucket_name,
))
print(result.bucket_info)

### Delete Bucket

In [None]:
try:
    client.delete_vector_bucket(oss_vectors.models.DeleteVectorBucketRequest(
        bucket=vector_bucket_name + "_not_exist",
    ))
except Exception as e:
    print(e)

# Index 操作

### 创建 Index

In [None]:
result = client.put_vector_index(oss_vectors.models.PutVectorIndexRequest(
    bucket=vector_bucket_name,
    index_name=vector_index_name,
    dimension=dimension,
    data_type='float32',
    distance_metric='cosine',
    metadata={"nonFilterableMetadataKeys": ["key1", "key2"]}
))
print(f'status code: {result.status_code}, request id: {result.request_id}')

### List Index

In [None]:
result = client.list_vector_indexes(oss_vectors.models.ListVectorIndexesRequest(
    bucket=vector_bucket_name,
))
print(f'status code: {result.status_code}, request id: {result.request_id}')
print([index["indexName"] for index in result.indexes])

### Get Index

In [None]:
result = client.get_vector_index(oss_vectors.models.GetVectorIndexRequest(
    bucket=vector_bucket_name,
    index_name=vector_index_name,
))
print(f'status code: {result.status_code}, request id: {result.request_id}')
import json

print(json.dumps(result.index, indent=4))

### Delete Index

In [None]:
try:
    result = client.delete_vector_index(oss_vectors.models.DeleteVectorIndexRequest(
        bucket=vector_bucket_name,
        index_name="not_exist",
    ))
    print(f'status code: {result.status_code}, request id: {result.request_id}')

except Exception as e:
    print(e)


# Vector 操作

### Put Vector

In [None]:
# 写入10行样例数据
vectors = []
for idx in range(5):
    vectors.append({
        "key": str(idx),
        "data": {
            "float32": [0.1] * dimension
        },
        "metadata": {
            "size": "small"
        }
    })
result = client.put_vectors(oss_vectors.models.PutVectorsRequest(
    bucket=vector_bucket_name,
    index_name=vector_index_name,
    vectors=vectors,
))
print(f'status code: {result.status_code}, request id: {result.request_id}')

### 准备图片数据


In [None]:
import json

image_data_array = []
with open("../data/data.json", "r") as f:
    image_data_array = json.load(f)
print("image data count:", len(image_data_array))

### 写入图片数据

In [None]:
batch_size = 500
vectors = []
for idx in range(len(image_data_array)):
    vectors.append(image_data_array[idx])
    if len(vectors) == batch_size:
        result = client.put_vectors(oss_vectors.models.PutVectorsRequest(
            bucket=vector_bucket_name,
            index_name=vector_index_name,
            vectors=vectors,
        ))
        print(f'status code: {result.status_code}, request id: {result.request_id}')
        vectors = []
if len(vectors) > 0:
    result = client.put_vectors(oss_vectors.models.PutVectorsRequest(
        bucket=vector_bucket_name,
        index_name=vector_index_name,
        vectors=vectors,
    ))
    print(f'status code: {result.status_code}, request id: {result.request_id}')

### Get Vector

In [None]:
result = client.get_vectors(oss_vectors.models.GetVectorsRequest(
    bucket=vector_bucket_name,
    index_name=vector_index_name,
    keys=[
        "0", "1", "2", "3", "4",
        "404-not-found",
        image_data_array[0]["key"]
    ],
    return_data=False,
    return_metadata=True,
))
print(f'status code: {result.status_code}, request id: {result.request_id}')
print(result.vectors)

### Delete Vector

In [None]:
result = client.delete_vectors(oss_vectors.models.DeleteVectorsRequest(
    bucket=vector_bucket_name,
    index_name=vector_index_name,
    keys=[
        "0", "1", "2", "3", "4",
        "404-not-found",
    ],
))
print(f'status code: {result.status_code}, request id: {result.request_id}')

### List Vector 

In [None]:
result = client.list_vectors(oss_vectors.models.ListVectorsRequest(
    bucket=vector_bucket_name,
    index_name=vector_index_name,
    max_results=3,
    return_data=False,
    return_metadata=False,
))
print(result.vectors)
print(f"next_token:{result.next_token}")

result = client.list_vectors(oss_vectors.models.ListVectorsRequest(
    bucket=vector_bucket_name,
    index_name=vector_index_name,
    max_results=3,
    return_data=False,
    return_metadata=False,
    next_token=result.next_token,
))
print(result.vectors)

### 切片 List 
可以实现多线程快速遍历整个向量索引

In [None]:
segment_count = 4
for idx in range(segment_count):
    result = client.list_vectors(oss_vectors.models.ListVectorsRequest(
        bucket=vector_bucket_name,
        index_name=vector_index_name,
        max_results=500,
        return_data=False,
        return_metadata=False,
        segment_count=segment_count,
        segment_index=idx
    ))
    print(f"segment idx:{idx}, count:{len(result.vectors)}")

### Query Vector

In [None]:
import dashscope
from dashscope import MultiModalEmbeddingItemText

def embedding(text) -> list[float]:
    return dashscope.MultiModalEmbedding.call(
        model="multimodal-embedding-v1",
        input=[MultiModalEmbeddingItemText(text=text, factor=1.0)]
    ).output["embeddings"][0]["embedding"]

In [None]:
result = client.query_vectors(oss_vectors.models.QueryVectorsRequest(
    bucket=vector_bucket_name,
    index_name=vector_index_name,
    query_vector={
        "float32": embedding("狗狗")
    },
    filter={
        "city": "hangzhou"
    },
    top_k=5,
    return_distance=True,
    return_metadata=True,
))
print(result.vectors)

# 图形化展示

为了更好的展示效果，使用 Gradio 做了一个界面进行演示。