In [3]:
import numpy as np  # numpy!
import seaborn as sns # visualisation!
import matplotlib.pyplot as plt # visualisation!
import pandas as pd # dataframes & data analysis!
from ast import literal_eval
import re #for Regex
from dotenv import load_dotenv
import os

### MongoDb Connection

In [4]:
from pymongo import MongoClient

#get secrets from .env
load_dotenv()

uri = os.getenv('URI')

# Create a new client and connect to the server
client = MongoClient(uri)
# Send a ping to confirm a successful connection
try:
    client.admin.command('ping')
    print("Pinged your deployment. You successfully connected to MongoDB!")
except Exception as e:
    print(e)

Pinged your deployment. You successfully connected to MongoDB!


In [5]:
db = client['lighthouse']

# Get collection 
collection = db['llm_datas']
collection.find_one()

# get all in collection 
#documents = collection.find()

# Print the documents
#for document in documents:
#    print(document)

{'_id': ObjectId('6697b3b814a5850299849064'),
 'llm_data_id': 1,
 'name': 'ToyMix',
 'type_id': ObjectId('6697b42814a585029984929e'),
 'organization_id': ObjectId('6697b54414a5850299849566'),
 'description_id': ObjectId('6697c27414a5850299849af3'),
 'created_date_id': ObjectId('6697c08414a58502998497c4'),
 'url_id': ObjectId('6697c29714a5850299849ba8'),
 'datasheet_id': ObjectId('6697c24314a585029984994c'),
 'modality_id': ObjectId('6697d1719fcba3f46768dc4b'),
 'size_id': ObjectId('6697d1fb9fcba3f46768dd5d'),
 'sample_id': ObjectId('6697bf5414a58502998496df'),
 'analysis_id': ObjectId('6697c1a414a58502998498d9'),
 'dependencies_id': ObjectId('669a3ac8db0298c298c50a20'),
 'included_id': ObjectId('6697d1449fcba3f46768db83'),
 'excluded_id': ObjectId('6697c2cf14a5850299849cff'),
 'quality_control_id': ObjectId('6697d1dc9fcba3f46768dd33'),
 'access_id': ObjectId('6697bcf614a58502998495cc'),
 'license_id': ObjectId('6697be3f14a58502998495ee'),
 'intended_uses_id': ObjectId('6697d15a9fcba3f4

In [40]:
## loop through documents in LLM_data

def update_llm_data_with_objects():
    
    llm_collection = db['llm_datas']
    
    for doc in llm_collection.find():
        updates = {}
        model_name = doc["name"]
        document_id = doc["_id"]
        for field, ref_id in doc.items():
            if field.endswith("_id"):  # Identify reference fields
                if field != '_id' and field != 'llm_data_id':
                    x = field.rsplit("_", 1)
                    collection_name = 'llm_' + x[0]
                    #print(f"{field}: {ref_id}")
                    #print(collection_name)
                    
                    # Perform lookup for the corresponding collection
                    collection_name = db[collection_name]
                    referenced_doc = collection_name.find_one({field: ref_id})
                    if referenced_doc:  # Check if referenced document exists
                        updates[field] = referenced_doc['_id']  # Update with the objectid from collection
                        #print(referenced_doc['_id'])
         
        #print(updates)            
         # Replace document_id with the actual ID of the document you want to update
        result = llm_collection.update_one({"_id": document_id}, {"$set": updates})
        
        if result.matched_count > 0:
            print(f"Document with ID {document_id}, model: {model_name} updated successfully.")
        else:
            print(f"No document found with ID {document_id}.")
                
        
update_llm_data_with_objects()

Document with ID 6697b3b814a5850299849064, model: ToyMix updated successfully.
Document with ID 6697b3b814a5850299849065, model: LargeMix updated successfully.
Document with ID 6697b3b814a5850299849066, model: UltraLarge updated successfully.
Document with ID 6697b3b814a5850299849067, model: Lag-LLaMA updated successfully.
Document with ID 6697b3b814a5850299849068, model: Prithvi updated successfully.
Document with ID 6697b3b814a5850299849069, model: Watsonx.ai updated successfully.
Document with ID 6697b3b814a585029984906a, model: Granite updated successfully.
Document with ID 6697b3b814a585029984906b, model: Animagine XL 3.1 updated successfully.
Document with ID 6697b3b814a585029984906c, model: Portkey updated successfully.
Document with ID 6697b3b814a585029984906d, model: Viable updated successfully.
Document with ID 6697b3b814a585029984906e, model: Auto-GPT updated successfully.
Document with ID 6697b3b814a585029984906f, model: Bark updated successfully.
Document with ID 6697b3b81

#### Join objectIDs of models with dependency 

In [18]:
def update_dependencies_id():
    llm_collection = db['llm_dependencies']
    for doc in llm_collection.find():
        updates = {}
        object_id_list = []
        dependecies_ids = doc["dependencies_llm_ids"]
        document_id = doc["_id"]
        # Get model objectID from llm_datas for each model id
        #print(dependecies_ids)
        for id in dependecies_ids:
            #print(id)
            if id != None:
                #print(id)
                # Lookup id in main llm table
                llm_data_table = db['llm_datas']
                referenced_doc = llm_data_table.find_one({'llm_data_id': id})
                #print(f'{referenced_doc["name"]}: {referenced_doc["_id"]}')
                object_id_list.append(referenced_doc["_id"])
            else:
                object_id_list.append(id)

        print(f'Ids to add: {object_id_list}')
        updates['dependencies_llm_ids'] = object_id_list
        result = llm_collection.update_one({"_id": document_id}, {"$set": updates})
        
        if result.matched_count > 0:
            print(f"Document with ID {document_id} updated successfully.")
        else:
            print(f"No document found with ID {document_id}.")

update_dependencies_id()

Ids to add: [None, None]
Document with ID 669a3ac8db0298c298c50945 updated successfully.
Ids to add: [ObjectId('6697b3b814a58502998491bd')]
Document with ID 669a3ac8db0298c298c50946 updated successfully.
Ids to add: [None]
Document with ID 669a3ac8db0298c298c50947 updated successfully.
Ids to add: [None, None, ObjectId('6697b3b814a585029984911d')]
Document with ID 669a3ac8db0298c298c50948 updated successfully.
Ids to add: [ObjectId('6697b3b814a5850299849267'), ObjectId('6697b3b814a5850299849170'), ObjectId('6697b3b814a585029984920e'), None, ObjectId('6697b3b814a58502998491e5'), ObjectId('6697b3b814a5850299849210')]
Document with ID 669a3ac8db0298c298c50949 updated successfully.
Ids to add: [ObjectId('6697b3b814a5850299849184'), None]
Document with ID 669a3ac8db0298c298c5094a updated successfully.
Ids to add: [None]
Document with ID 669a3ac8db0298c298c5094b updated successfully.
Ids to add: [ObjectId('6697b3b814a58502998491fa')]
Document with ID 669a3ac8db0298c298c5094c updated successf