# Milvus

In [128]:
from pymilvus import connections, DataType, CollectionSchema, FieldSchema, Collection, Partition

# Check if the connection already exists
if connections.has_connection('default'):
    connections.remove_connection('default')  # Disconnect if it exists

# Now, reconnect with your new configuration
connections.connect(alias='default', host='localhost', port='19530')

# Define collection schema
collection_schema = CollectionSchema(fields=[
    FieldSchema(name="uuid", dtype=DataType.VARCHAR, is_primary=True, max_length=36),  # Define primary key here with max_length
    FieldSchema(name="metadata", dtype=DataType.JSON),
    FieldSchema(name="embeddings", dtype=DataType.FLOAT_VECTOR, dim=1536)
], description="Collection of articles")

# Create collection
collection = Collection(name="rmrj", schema=collection_schema)

# Create partition
partition = Partition(collection, "rmrj_articles")


## Embeddings

In [142]:
import openai
import pandas as pd
import re

OPENAI_API_KEY = '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'
PINECONE_API_KEY = '69d02c68-4ee9-4ce6-b3b2-7437b74f85a7'
PINECONE_ENVIRONMENT = 'us-west1-gcp'


from openai.embeddings_utils import get_embedding
embedding_model = "text-embedding-ada-002"
embedding_encoding = "cl100k_base"
max_tokens = 8000
openai.api_key = OPENAI_API_KEY
def get_embedding(text, model=embedding_model):
   text = text.replace("\n", " ")
   return openai.Embedding.create(input = [text], model=model)['data'][0]['embedding']

In [157]:
index_params = {
    "metric_type": "L2",   # Could be L2, IP, etc.
    "index_type": IndexType.IVF_SQ8,   # Change to the index type suitable for your case.
    "params": {"nlist": 16384},  # Change nlist value according to your data
}

# Create the index
collection.create_index("embeddings", index_params)

Status(code=0, message=)

## Querying

In [158]:
from pymilvus import Collection
collection = Collection("rmrj")      # Get an existing collection.
collection.load()


In [151]:
import numpy as np

In [186]:
query_vectors = get_embedding("Junior High school")  # Your query vectors here

# Ensure query_vectors is a 2-D array
query_vectors = np.array(query_vectors)
if len(query_vectors.shape) == 1:
    query_vectors = query_vectors.reshape(1, -1)

search_params = {
    "metric_type": "L2",  # Distance metric, can be L2, IP (Inner Product), etc.
    "params": {"nprobe": 10},
    "offset": 0,
}

# vectors should be a 2-D array and limit is the maximum number of total returned results
results = collection.search(
    data=query_vectors, 
    anns_field="embeddings", 
    param=search_params, 
    limit=10,
    output_fields=['metadata'],
    consistency_level="Strong"
)

# Now you can iterate over the results
for i, hit in enumerate(results[0]):
#     print("Result #{0}: ID={1}, Metadata={2}, Distance={3}\n".format(i, hit.id, hit.entity.get('metadata'), hit.distance))
    dats = hit.entity.get('metadata')
    print("\n\n")
    print(dats['Author'])
    print(dats['text'])
    print(hit.distance)
    print("\n\n")




Chris Feli Joy P. Tajonera, Dennis V. Madrigal
title: Correlating the Psychological and Spiritual Well-Being of Junior High School Students in Selected Catholic Schools in Central Philippines, keywords: Psychology, psycho-spiritual well-being, junior high school students, Philippines, author: Chris Feli Joy P. Tajonera, Dennis V. Madrigal, doi: https://doi.org/10.32871/rmrj2210.01.05, abstract: This descriptive-correlational study assesses the psycho-spiritual well-being of junior high school students at Catholic Schools in Central Philippines. Also, it determines the relationship between the said constructs, and at the same, their association between the variables of sex, family structure, birth order, religious affiliation, and church involvement. The data are gathered using Ryffâ€™s Psychological Well-being and Ellison and Paloutzianâ€™s Spiritual Well-being Scale. The findings reveal moderate levels of psychological and spiritual well-being among the respondents. Point biserial 

## Connection 

In [77]:
from pymilvus import connections, db, CollectionSchema, FieldSchema, DataType, Collection, utility
import json
PORT = 19530
HOST = "127.0.0.1"

conn = connections.connect(host=HOST, port=PORT)

db.create_database("JQ")


In [79]:
with open('rmrj/rmrj.json') as f:
    data = json.load(f)

In [84]:
data = [{'uuid': i[0], 'metadata': i[1], 'embeddings': i[2]} for i in data]

In [187]:
mr = collection.insert(data, partition_name="rmrj_articles")

In [189]:
print(mr)

(insert count: 151, delete count: 0, upsert count: 0, timestamp: 442782850599616515, success count: 151, err count: 0)


In [200]:
from pymilvus import connections, Collection

# Connect to the Milvus server
connections.connect(host='localhost', port='19530')

# Specify the collection name
collection_name = 'rmrj'

# Load the collection
collection = Collection(name=collection_name)

# Get the number of records in the collection
num_records = collection.num_entities


# Print the collection description
print(f"Collection Name: {collection_name}")
print(f"Number of Records: {num_records}")


MilvusException: <MilvusException: (code=2, message=Fail connecting to server on localhost:19530. Timeout)>

In [199]:
from pymilvus import connections, Collection

# Connect to the Milvus server
connections.connect(host='localhost', port='19530')

# Specify the collection name

# Load the collection
collection = Collection(name=collection_name)

# Set the batch size for retrieval
batch_size = 1000

# Get the total number of records in the collection
num_records = collection.num_entities

# Retrieve and view the entire collection
for offset in range(0, num_records, batch_size):
    # Perform a query to retrieve a batch of records
    query_results = collection.query([], topk=batch_size, offset=offset)

    # Iterate over the query results and view the records
    for result in query_results:
        print(result.entity)

# Disconnect from the Milvus server
connections.disconnect()


MilvusException: <MilvusException: (code=2, message=Fail connecting to server on localhost:19530. Timeout)>