In [16]:
from pymilvus import FieldSchema, CollectionSchema, DataType, MilvusClient

dbName = "Profspective"
collectionName = "ProfReview"

In [17]:
from dotenv import load_dotenv,  find_dotenv
load_dotenv(find_dotenv('.env.local'))
import os

In [18]:
ENDPOINT = "https://in03-e01b833309331cb.api.gcp-us-west1.zillizcloud.com"
TOKEN = os.getenv("ZILLIZ_API_KEY")

client = MilvusClient(uri=ENDPOINT, token = TOKEN)

In [19]:
DIMENSIONS = 384
MAX_STRING_LENGTH = 500

fields = [
	FieldSchema(name = "id", dtype = DataType.INT64, is_primary = True, description = "Id"),
 	FieldSchema(name = "embedding", dtype = DataType.FLOAT_VECTOR, dim = DIMENSIONS, description = "Embeddings"),
  	FieldSchema(name = "professor", dtype = DataType.VARCHAR, max_length = MAX_STRING_LENGTH, description = "Prof Name"),
    FieldSchema(name = "review", dtype = DataType.VARCHAR, max_length = MAX_STRING_LENGTH, default_value = "" , description = "Prof Reivew"),
    FieldSchema(name = "stars", dtype = DataType.INT8, default_value = 5, description = "Prof Stars"),
]

schema = CollectionSchema(fields = fields, enable_dynamic_field = True, auto_id = True, description="Schema For Professor Review")

In [20]:
index_params = client.prepare_index_params()

index_params.add_index(
    field_name = "embedding",
	index_type = "IVF_FLAT",
 	metric_type = "L2",
 	params = {"nlist": 40},
)

In [21]:
# '''Used to Clear the Collection (deletes collection and all data)'''

# if client.has_collection("ProfReview"):
#     client.drop_collection(collection_name = "ProfReview")

In [22]:
def dropIndex():
    if(client.has_collection(collectionName)):
        fieldName = "embedding"
        
        existingFieldNames = client.list_indexes(collection_name = collectionName)
        client.release_collection(collection_name = collectionName)
        if(fieldName in existingFieldNames):
            client.drop_index(collection_name = collectionName, index_name = fieldName)

In [23]:
# # Drop the Index
# dropIndex()

client.create_collection(collection_name = collectionName, schema = schema, index_params = index_params)

In [24]:
from sentence_transformers import SentenceTransformer
embedder = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")

In [25]:
def reviewExists(professor, review, subject, stars):
    client.load_collection(collection_name = collectionName)
    
    queryFilter = f'professor == "{professor}" && review == "{review}" && subject == "{subject}" && stars == {stars}'
    result = client.query(collection_name="ProfReview", filter=queryFilter, output_fields=["id"])
    
    return len(result) > 0

In [26]:
def validateProfData(profData):
    requiredField = ["professor", "review", "subject", "stars"]
    
    for field in requiredField:
        if field not in profData:
            raise Exception(f'Missing Field in Prof Data "{field}"')

In [27]:
import json

jsonData = json.load(open("../review.json"))
reviews = jsonData["reviews"]

In [28]:
insertDb = []

for profData in reviews:
    validateProfData(profData)
    
    if(reviewExists(profData["professor"], profData["review"], profData["subject"], profData["stars"])):
        print("Review Already Exists in Vector Database")
        continue
    
    entry = profData.copy()
    
    reviewEmbedding = embedder.encode(profData["professor"] + profData["review"])
    
    entry["embedding"] = reviewEmbedding
    
    insertDb.append(entry)
    
if(insertDb):
    output = client.insert(collection_name = collectionName, data = insertDb)
    print(output)
else:
    print("Nothing to Add")

Review Already Exists in Vector Database
Review Already Exists in Vector Database
Review Already Exists in Vector Database
Review Already Exists in Vector Database
Review Already Exists in Vector Database
Review Already Exists in Vector Database
Review Already Exists in Vector Database
Review Already Exists in Vector Database
Review Already Exists in Vector Database
Review Already Exists in Vector Database
Review Already Exists in Vector Database
Review Already Exists in Vector Database
Review Already Exists in Vector Database
Review Already Exists in Vector Database
Review Already Exists in Vector Database
Review Already Exists in Vector Database
Review Already Exists in Vector Database
Review Already Exists in Vector Database
Review Already Exists in Vector Database
Review Already Exists in Vector Database
Review Already Exists in Vector Database
Review Already Exists in Vector Database
Review Already Exists in Vector Database
Review Already Exists in Vector Database
Review Already E

In [29]:
client.close()