In [1]:
from pymilvus import (
    connections,
    DataType,
    CollectionSchema,
    FieldSchema,
    Collection,
    Partition,
    utility,
)
from pymilvus import Milvus, DataType, Collection, MilvusException
import openai
import pandas as pd
import numpy as np
import re
import json
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
import time
from tqdm import tqdm
from joblib import load
import uuid
from datetime import datetime

openai.api_key = "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"
collections_list = [
    "text_collection",
    "author_collection",
    "title_collection",
    "contact_collection",
    "name_collection",
    "position_collection",
    "department_collection",
    "date_collection",
]
fields_list = [
    "text",
    "author",
    "title",
    "contact",
    "name",
    "position",
    "department",
    "date",
]
collections_dict = {
    "text_collection": [
        "uuid",
        "text_id",
        "text",
        "embeds",
        "media",
        "link",
        "partition_name",
    ],
    "author_collection": ["uuid", "author", "embeds", "partition_name"],
    "title_collection": ["uuid", "title", "embeds", "partition_name"],
    "date_collection": ["uuid", "date", "embeds", "partition_name"],
    "contact_collection": ["uuid", "contact", "embeds", "partition_name"],
    "department_collection": ["uuid", "department", "embeds", "partition_name"],
    "name_collection": ["uuid", "name", "embeds", "partition_name"],
    "position_collection": ["uuid", "position", "embeds", "partition_name"],
}

partitions = {
    "documents_partition": [
        "text_collection",
        "author_collection",
        "title_collection",
        "date_collection",
    ],
    "social_posts_partition": ["text_collection", "date_collection"],
    "contacts_partition": [
        "name_collection",
        "text_collection",
        "contact_collection",
        "department_collection",
    ],
    "people_partition": [
        "text_collection",
        "name_collection",
        "position_collection",
        "department_collection",
    ]}

# Check if the connection already exists
if connections.has_connection("default"):
    connections.remove_connection("default")  # Disconnect if it exists

# Now, reconnect with your new configuration
connections.connect(alias="default", host="localhost", port="19530")

In [6]:
from pymilvus import Collection
collection = Collection("text_collection")      # Get an existing collection.
collection.load()

In [8]:
# Assuming you have already imported all necessary modules and set up the environment

# Define a function to query all items in a collection based on partition_name
def query_collection_by_partition(collection_name, partition_name):
    collection = Collection(collection_name)
    collection.load()
    res = collection.query(
        expr=f"partition_name == '{partition_name}'",
        output_fields=collections_dict[collection_name],
        limit = 10
    )
    return res

# Define a function to combine results by uuid
def combine_results_by_uuid(partition_name):
    combined_results = {}
    
    # Loop through each collection in the given partition
    for collection_name in partitions[partition_name]:
        results = query_collection_by_partition(collection_name, partition_name)
        
        # Loop through each result and store/combine in the combined_results dictionary
        for item in results:
            uuid = item['uuid']
            if uuid not in combined_results:
                combined_results[uuid] = {}
            for field in collections_dict[collection_name]:
                if field != 'uuid' and field != 'partition_name':
                    combined_results[uuid][field] = item[field]
    
    return combined_results

# Example usage
partition_name = "documents_partition"
combined_data = combine_results_by_uuid(partition_name)
print(combined_data)


{'022dab5c-66de-4f8e-bd38-6951f7d8f5cb': {'text_id': 'a620f818-10f3-4151-ba3e-3eaf242c3bf8', 'text': 'The Philippines has been inundated with socio-politico issues that impede peopleâ€™s desire for the countryâ€™s uninterrupted progress. These issues may be complex but their antidote is contrarily simple. An answer is found in Immanuel Kantâ€™s thought on moral culture which pertains to three essential features, namely: obedience, truthfulness, and sociableness. This qualitative research focuses on the fundamental ideas in the section on moral culture in Kantâ€™s On Education. As my contribution to the fund of knowledge, I proceed by establishing the implications and antitheses of the three features of Kantâ€™s moral culture. A discussion on obedience differentiates its two kinds: absolute and voluntary. Absolute obedience is the result of compulsion by a command, while voluntary obedience is the result of confidence by a reasonable will. It is impossible to think of character formatio

In [69]:
# Assuming you have already imported all necessary modules and set up the environment

# Define a dictionary that maps each collection to its desired field
desired_fields = {
    "text_collection": "text",
    "title_collection": "title",
    "author_collection": "author",
    "contact_collection": "contact",
    "department_collection": "department",
    "name_collection": "name",
    "position_collection": "position",
    "date_collection": "date"
}

table_fields = {
    "documents_partition": [
        "text",
        "author",
        "title",
        "date",
    ],
    "social_posts_partition": ["text", "date"],
    "contacts_partition": [
        "name",
        "text",
        "contact",
        "department",
    ],
    "people_partition": [
        "text",
        "name",
        "position",
        "department",
    ]}

# Define a function to query all items in a collection based on partition_name
def query_collection_by_partition(collection_name, partition_name):
    collection = Collection(collection_name)
    collection.load()
    # Fetch text_id for text_collection and uuid for other collections
    id_field = "text_id" if collection_name == "text_collection" else "uuid"
    res = collection.query(
        expr=f"partition_name == '{partition_name}'",
        output_fields=[desired_fields[collection_name], id_field]
    )
    return res


# ... [rest of your code]

# ... [rest of your code]

# Define a function to combine results by uuid (or text_id for text_collection)
def combine_results_by_uuid(partition_name):
    combined_results = {}
    
    # Loop through each collection in the given partition
    for collection_name in partitions[partition_name]:
        results = query_collection_by_partition(collection_name, partition_name)
        
        # Loop through each result and store/combine in the combined_results dictionary
        for item in results:
            # Use text_id for text_collection and uuid for other collections
            id_key = "text_id" if collection_name == "text_collection" else "uuid"
            unique_id = item[id_key]
            field_name = desired_fields[collection_name]
            
            # Initialize the unique_id entry with all desired fields set to empty strings
            if unique_id not in combined_results:
                combined_results[unique_id] = {field: "" for field in table_fields[partition_name]}
            
            # Append the value if it already exists for the unique_id
            if field_name in combined_results[unique_id] and combined_results[unique_id][field_name]:
                combined_results[unique_id][field_name] += ", " + item[field_name]
            else:
                combined_results[unique_id][field_name] = item[field_name]
    
    return combined_results

# Example usage
partition_name = "documents_partition"
combined_data = combine_results_by_uuid(partition_name)
# for item in combined_data['3a1b9f55-39ef-4982-97be-6f59f6521304']:
#     print(item)
combined_data


{'a620f818-10f3-4151-ba3e-3eaf242c3bf8': {'text': 'The Philippines has been inundated with socio-politico issues that impede peopleâ€™s desire for the countryâ€™s uninterrupted progress. These issues may be complex but their antidote is contrarily simple. An answer is found in Immanuel Kantâ€™s thought on moral culture which pertains to three essential features, namely: obedience, truthfulness, and sociableness. This qualitative research focuses on the fundamental ideas in the section on moral culture in Kantâ€™s On Education. As my contribution to the fund of knowledge, I proceed by establishing the implications and antitheses of the three features of Kantâ€™s moral culture. A discussion on obedience differentiates its two kinds: absolute and voluntary. Absolute obedience is the result of compulsion by a command, while voluntary obedience is the result of confidence by a reasonable will. It is impossible to think of character formation without also thinking of truthfulness. Kant asser

In [70]:
# Assuming you have already imported all necessary modules and set up the environment

# Define a dictionary that maps each collection to its desired field
desired_fields = {
    "text_collection": "text",
    "title_collection": "title",
    "author_collection": "author",
    "contact_collection": "contact",
    "department_collection": "department",
    "name_collection": "name",
    "position_collection": "position",
    "date_collection": "date"
}

    table_fields = {
        "documents_partition": [
            "text",
            "author",
            "title",
            "date",
        ],
        "social_posts_partition": ["text", "date"],
        "contacts_partition": [
            "name",
            "text",
            "contact",
            "department",
        ],
        "people_partition": [
            "text",
            "name",
            "position",
            "department",
        ]}

# Define a function to query all items in a collection based on partition_name
def query_collection_by_partition(collection_name, partition_name):
    collection = Collection(collection_name)
    collection.load()
    # Fetch text_id for text_collection and uuid for other collections
    id_field = "text_id" if collection_name == "text_collection" else "uuid"
    res = collection.query(
        expr=f"partition_name == '{partition_name}'",
        output_fields=[desired_fields[collection_name], id_field]
    )
    return res


# ... [rest of your code]

# ... [rest of your code]

# Define a function to combine results by uuid (or text_id for text_collection)
def combine_results_by_uuid(partition_name):
    combined_results = {}
    
    # Loop through each collection in the given partition
    for collection_name in partitions[partition_name]:
        results = query_collection_by_partition(collection_name, partition_name)
        
        # Loop through each result and store/combine in the combined_results dictionary
        for item in results:
            # Use text_id for text_collection and uuid for other collections
            id_key = "text_id" if collection_name == "text_collection" else "uuid"
            unique_id = item[id_key]
            field_name = desired_fields[collection_name]
            
            # Initialize the unique_id entry with all desired fields set to empty strings
            if unique_id not in combined_results:
                combined_results[unique_id] = {field: "" for field in table_fields[partition_name]}
            
            # Append the value if it already exists for the unique_id
            if field_name in combined_results[unique_id] and combined_results[unique_id][field_name]:
                combined_results[unique_id][field_name] += ", " + item[field_name]
            else:
                combined_results[unique_id][field_name] = item[field_name]
    
    return combined_results

# Example usage
partition_name = "documents_partition"
combined_data = combine_results_by_uuid(partition_name)
# for item in combined_data['3a1b9f55-39ef-4982-97be-6f59f6521304']:
#     print(item)

def create_table(combined_data, partition_name):
    table = {}
    for i, (uuid, data) in enumerate(combined_data.items()):
        table[i] = {'uuid': uuid}
        for fieldname in table_fields[partition_name]:
            table[i][fieldname] = data.get(fieldname, "")  # Use get() to handle missing fields
    return table

partition_name = "documents_partition"
combined_data = combine_results_by_uuid(partition_name)
table_data = create_table(combined_data, partition_name)

In [75]:
table_data

{0: {'uuid': 'a620f818-10f3-4151-ba3e-3eaf242c3bf8',
  'text': 'The Philippines has been inundated with socio-politico issues that impede peopleâ€™s desire for the countryâ€™s uninterrupted progress. These issues may be complex but their antidote is contrarily simple. An answer is found in Immanuel Kantâ€™s thought on moral culture which pertains to three essential features, namely: obedience, truthfulness, and sociableness. This qualitative research focuses on the fundamental ideas in the section on moral culture in Kantâ€™s On Education. As my contribution to the fund of knowledge, I proceed by establishing the implications and antitheses of the three features of Kantâ€™s moral culture. A discussion on obedience differentiates its two kinds: absolute and voluntary. Absolute obedience is the result of compulsion by a command, while voluntary obedience is the result of confidence by a reasonable will. It is impossible to think of character formation without also thinking of truthfulnes

In [78]:
combined_data = combine_results_by_uuid(partition_name)
table_data = create_table(combined_data, partition_name)
print(table_data)

