# **1. Manage Databases**

## **1.1. Create database**

In [1]:
from pymilvus import connections, db

conn = connections.connect(host="127.0.0.1", port=19530)
# database = db.create_database("dolphinai_db")


In [13]:
db.list_database()

['dolphinai_db', 'default']

## **1.2. Connect to the Client**

if you simply provide `uri = "http://localhost:19530/"` it will connect to the `default` db

In [6]:
from pymilvus import MilvusClient, DataType

# 1. Set up a Milvus client
client = MilvusClient(
    uri="http://localhost:19530/dolphinai_db"
)
client.list_collections() 

['dolphinai_collection']

# **2. Manage Collections**

## **2.1. Customized Collection setup**

### **2.1.1. Step 1: Set up schema**

In [44]:
# client.drop_collection("dolphinai_collection")
client.list_collections()

[]

In [45]:

schema = MilvusClient.create_schema(
    auto_id=True,
    enable_dynamic_field=True,
    # partition_key_field="doc_types",
    # num_partitions=2,                   # Number of partitions. Defaults to 16.
    description="dolphinai_collection"

)

schema.add_field(field_name="id", datatype=DataType.INT64, is_primary=True, description="primary id")
schema.add_field(field_name="chunk_id", datatype=DataType.VARCHAR,max_length=50, description="document chunk id")
schema.add_field(field_name="text", datatype=DataType.VARCHAR,max_length=3000, description="text")
schema.add_field(field_name="source", datatype=DataType.VARCHAR, max_length=256, description="document source")
schema.add_field(field_name="embedding", datatype=DataType.FLOAT_VECTOR,  dim=1024, description="vector embeddings")


{'auto_id': True, 'description': 'dolphinai_collection', 'fields': [{'name': 'id', 'description': 'primary id', 'type': <DataType.INT64: 5>, 'is_primary': True, 'auto_id': False}, {'name': 'chunk_id', 'description': 'document chunk id', 'type': <DataType.VARCHAR: 21>, 'params': {'max_length': 50}}, {'name': 'text', 'description': 'text', 'type': <DataType.VARCHAR: 21>, 'params': {'max_length': 3000}}, {'name': 'source', 'description': 'document source', 'type': <DataType.VARCHAR: 21>, 'params': {'max_length': 256}}, {'name': 'embedding', 'description': 'vector embeddings', 'type': <DataType.FLOAT_VECTOR: 101>, 'params': {'dim': 1024}}], 'enable_dynamic_field': True}

### **2.1.2. Step 2: Set up index parameters**

In [46]:
index_params = client.prepare_index_params()

index_params.add_index(
    field_name="id",
    index_type="STL_SORT"
)

index_params.add_index(
    field_name="embedding", 
    index_type="GPU_IVF_FLAT",
    metric_type="L2",
    params={ "nlist": 1024 }
)


### **2.1.3. Step 3: Create the collection**

In [47]:
import time

client.create_collection(
    collection_name="dolphinai_collection",
    schema=schema,
    index_params=index_params,
    using='dolphinai_db', # you ae specifying with db to use
    shards_num=3
)

time.sleep(5)

res = client.get_load_state(
    collection_name="dolphinai_collection"
)

print(res)



{'state': <LoadState: Loaded>}


In [48]:
client.describe_collection("dolphinai_collection")

{'collection_name': 'dolphinai_collection',
 'auto_id': True,
 'num_shards': 3,
 'description': 'dolphinai_collection',
 'fields': [{'field_id': 100,
   'name': 'id',
   'description': 'primary id',
   'type': <DataType.INT64: 5>,
   'params': {},
   'auto_id': True,
   'is_primary': True},
  {'field_id': 101,
   'name': 'chunk_id',
   'description': 'document chunk id',
   'type': <DataType.VARCHAR: 21>,
   'params': {'max_length': 50}},
  {'field_id': 102,
   'name': 'text',
   'description': 'text',
   'type': <DataType.VARCHAR: 21>,
   'params': {'max_length': 3000}},
  {'field_id': 103,
   'name': 'source',
   'description': 'document source',
   'type': <DataType.VARCHAR: 21>,
   'params': {'max_length': 256}},
  {'field_id': 104,
   'name': 'embedding',
   'description': 'vector embeddings',
   'type': <DataType.FLOAT_VECTOR: 101>,
   'params': {'dim': 1024}}],
 'aliases': [],
 'collection_id': 452339642855856589,
 'consistency_level': 2,
 'properties': {},
 'num_partitions': 1,

## **2.2. View Collections**

In [39]:

res = client.describe_collection(
    collection_name="dolphinai_collection"
)

res

{'collection_name': 'dolphinai_collection',
 'auto_id': True,
 'num_shards': 3,
 'description': 'dolphinai_collection',
 'fields': [{'field_id': 100,
   'name': 'id',
   'description': 'primary id',
   'type': <DataType.INT64: 5>,
   'params': {},
   'auto_id': True,
   'is_primary': True},
  {'field_id': 101,
   'name': 'chunk_id',
   'description': 'document chunk id',
   'type': <DataType.VARCHAR: 21>,
   'params': {'max_length': 50}},
  {'field_id': 102,
   'name': 'text',
   'description': 'text',
   'type': <DataType.VARCHAR: 21>,
   'params': {'max_length': 3000}},
  {'field_id': 103,
   'name': 'doc_types',
   'description': 'document source',
   'type': <DataType.VARCHAR: 21>,
   'params': {'max_length': 256}},
  {'field_id': 104,
   'name': 'embedding',
   'description': 'vector embeddings',
   'type': <DataType.FLOAT_VECTOR: 101>,
   'params': {'dim': 1024}}],
 'aliases': [],
 'collection_id': 452339642855855043,
 'consistency_level': 2,
 'properties': {},
 'num_partitions':

## **2.3. Load & Release Collection**

During the loading process of a collection, Milvus loads the collection’s index file into memory. Conversely, when releasing a collection, Milvus unloads the index file from memory. ***Before conducting searches in a collection, ensure that the collection is loaded***.

### **2.3.1. Load a collection**

In [40]:
client.load_collection(
    collection_name="dolphinai_collection",
    replica_number=1 # Number of replicas to create on query nodes. Max value is 1 for Milvus Standalone, and no greater than `queryNode.replicas` for Milvus Cluster.
)

res = client.get_load_state(
    collection_name="dolphinai_collection"
)

print(res)



{'state': <LoadState: Loaded>}


### **2.3.2. Release a collection**

In [41]:
client.release_collection(
    collection_name="dolphinai_collection"
)

res = client.get_load_state(
    collection_name="dolphinai_collection"
)

print(res)



{'state': <LoadState: NotLoad>}


## **2.4. Set up aliases**
You can assign aliases for collections to make them more meaningful in a specific context. You can assign multiple aliases for a collection, but multiple collections cannot share an alias.

### **2.4.1. Create aliases**

In [26]:
client.create_alias(
    collection_name="dolphinai_collection",
    alias="bob"
)

client.create_alias(
    collection_name="dolphinai_collection",
    alias="alice"
)


### **2.4.2. List aliases**

In [27]:
res = client.list_aliases(
    collection_name="dolphinai_collection"
)

print(res)



{'aliases': ['bob', 'alice'], 'collection_name': 'dolphinai_collection', 'db_name': 'dolphinai_db'}


### **2.4.3. Describe aliases**

In [5]:
res = client.describe_alias(
    alias="boba"
)

print(res)



{'alias': 'boba', 'collection_name': 'dolphinai_collection', 'db_name': 'dolphinai_db'}


### **2.4.4. Reassign aliases**

In [132]:
# client.alter_alias(
#     collection_name="dolphinai_collection_1",
#     alias="alice"
# )

# res = client.list_aliases(
#     collection_name="dolphinai_collection_1"
# )

# print(res)


# res = client.list_aliases(
#     collection_name="dolphinai_collection"
# )

# print(res)



{'aliases': ['bob', 'alice'], 'collection_name': 'dolphinai_collection', 'db_name': 'dolphinai_db'}
{'aliases': ['bob', 'alice'], 'collection_name': 'dolphinai_collection', 'db_name': 'dolphinai_db'}


### **2.4.5. Drop aliases**

In [25]:
client.drop_alias(
    alias="bob"
)

client.drop_alias(
    alias="alice"
)


# **3. Manage Partitons**

## **3.1. List Partitions**

A partition in Milvus represents a sub-division of a collection. This functionality allows the physical storage of a collection to be divided into multiple parts, contributing to improved query performance by narrowing down the focus to a smaller subset of data rather than the entire collection.

Upon the creation of a collection, at least a default partition named _default is automatically created. You can create a maximum of 1,024 partitions within a collection.

If the ***Partition Key*** feature is on in a collection, Milvus takes care of managing all the partitions, relieving you of this responsibility. in our cas the `position` field is a partition key.

A collection can have ***up to*** `1,024` ***partitions***.

In [42]:
res = client.list_partitions(collection_name="dolphinai_collection")
print(res)



['_default']


## **3.2. Create Partitions**

`If you have set a field as the partition key in a collection, Milvus takes care of managing the partitions in the collection. Therefore, you may encounter prompted errors when attempting to create partitions.`

In [49]:
client.create_partition(
    collection_name="dolphinai_collection",
    partition_name="transactions"
)

client.create_partition(
    collection_name="dolphinai_collection",
    partition_name="sap_documents"
)

res = client.list_partitions(collection_name="dolphinai_collection")
print(res)



['_default', 'transactions', 'sap_documents']


## **3.3. Check for a Specific Partition** 

In [32]:
res = client.has_partition(
    collection_name="dolphinai_collection",
    partition_name="transactions"
)
print(res)


res = client.has_partition(
    collection_name="dolphinai_collection",
    partition_name="sap_documents"
)
print(res)



True
True


## **3.4. Load & Release Partitions**

You can load and release specific partitions to make them available or unavailable for searches and queries.

### **3.4.1. Get Load Status**

In [33]:
# Release the collection
client.release_collection(collection_name="dolphinai_collection")

# Check the load status
res = client.get_load_state(collection_name="dolphinai_collection")
print(res)

In [8]:
res = client.get_load_state(
    collection_name="dolphinai_collection", 
    partition_name="transactions"
)

print(res)

{'state': <LoadState: Loaded>}


In [33]:
res = client.get_load_state(
    collection_name="dolphinai_collection", 
    partition_name="sap_documents"
)

print(res)

{'state': <LoadState: NotLoad>}


### **3.4.2. Load Partitions**

In [7]:
# Load single partition
client.load_partitions(
    collection_name="dolphinai_collection",
    partition_names=["transactions"]
)

res = client.get_load_state(collection_name="dolphinai_collection")
print(res)



{'state': <LoadState: Loaded>}


In [14]:
# Load single partition
client.load_partitions(
    collection_name="dolphinai_collection",
    partition_names=["transactions", "sap_documents"]
)

res = client.get_load_state(
    collection_name="dolphinai_collection",
    partition_name="transactions"
)
print(res)

res = client.get_load_state(
    collection_name="dolphinai_collection",
    partition_name="sap_documents"
)
print(res)

{'state': <LoadState: Loaded>}
{'state': <LoadState: Loaded>}


### **3.4.3. Release Partitions**

In [17]:
# Release single Partition
client.release_partitions(
    collection_name="dolphinai_collection",
    partition_names=["transactions"]
)

res = client.get_load_state(
    collection_name="dolphinai_collection", 
    partition_name="transactions"
)

print(res)

{'state': <LoadState: NotLoad>}


In [21]:
# Release multiple Partitions
client.release_partitions(
    collection_name="dolphinai_collection",
    partition_names=["_default", "transactions", "sap_documents"]
)

res = client.get_load_state(
    collection_name="dolphinai_collection",
)
print(res)

{'state': <LoadState: NotLoad>}


### **3.4.4. Release Partitions**

In [24]:
# client.drop_partition(
#     collection_name="dolphinai_collection",
#     partition_name="transactions"
# )

# res = client.list_partitions(collection_name="dolphinai_collection")
# print(res)



In [8]:
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
# Correctly referencing the file using the relative path
file_path = "../artifacts/files"
# Loading the data
documents = SimpleDirectoryReader(file_path).load_data()

In [9]:
documents[0]

Document(id_='25b86281-955b-415a-8cab-4379d58afca6', embedding=None, metadata={'page_label': '1', 'file_name': 'deliverynote1.pdf', 'file_path': '/data1/dolphinai-project/app/notebook/../artifacts/files/deliverynote1.pdf', 'file_type': 'application/pdf', 'file_size': 50953, 'creation_date': '2024-08-29', 'last_modified_date': '2024-08-29'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={}, text='Mikron Corp. Monroe Delivery Note N. 33721\nDelivery Note\nCodice Cliente: ACME001\nData: 25/11/2023\nFornitore:\nMikron Corp. Monroe\n200 Main Street\nMonroe, CT 06468, USA\nPhone: +1 203 261 31 00\nFax: +1 203 268 47 52Destinatario:\nACME Automotive Inc.\n456 Industrial Blvd\nSometown, CA 98765, USA\nPhone: +1 555 987 6543\nEmail: info@acmeautomotive.com\nQuantit` a Co