In [None]:
import os
import time

import IPython
from pymongo import MongoClient

from alligator import Alligator

IPython.display.clear_output(wait=True)

# Load the CSV file into a DataFrame
file_path = "./tables/imdb_top_100.csv"

# MongoDB connection
client = MongoClient("mongodb://gator-mongodb:27017/")
# Drop the entire crocodile_db database
# client.drop_database("crocodile_db")
db = client["alligator_db"]

# Drop all collections except 'bow_cache' and 'candidate_cache'
collections_to_keep = ["bow_cache", "literal_cache", "object_cache"]
all_collections = db.list_collection_names()

for collection in all_collections:
    if collection not in collections_to_keep:
        db[collection].drop()
        print(f"Dropped collection: {collection}")

print("All unwanted collections have been dropped.")

# Create an instance of the Alligator class
gator = Alligator(
    input_csv=file_path,
    entity_retrieval_endpoint=os.environ["ENTITY_RETRIEVAL_ENDPOINT"],
    entity_retrieval_token=os.environ["ENTITY_RETRIEVAL_TOKEN"],
    object_retrieval_endpoint=os.environ["OBJECT_RETRIEVAL_ENDPOINT"],
    literal_retrieval_endpoint=os.environ["LITERAL_RETRIEVAL_ENDPOINT"],
    max_workers=1,
    candidate_retrieval_limit=10,
    max_candidates_in_result=3,
    batch_size=256,
    columns_type={
        "NE": {"0": "OTHER", "7": "OTHER"},
        "LIT": {"1": "NUMBER", "2": "NUMBER", "3": "STRING", "4": "NUMBER", "5": "STRING"},
        "IGNORED": ["6", "9", "10", "7", "8"],
    },
)

# Run the entity linking process
tic = time.perf_counter()
gator.run()
toc = time.perf_counter()
print("Elapsed time:", toc - tic)
print("Entity linking process completed.")

In [None]:
import os

from alligator.fetchers import ObjectFetcher, LiteralFetcher

object_retrieval_endpoint = os.environ["OBJECT_RETRIEVAL_ENDPOINT"]
literal_retrieval_endpoint = os.environ["LITERAL_RETRIEVAL_ENDPOINT"]
entity_retrieval_token = os.environ["ENTITY_RETRIEVAL_TOKEN"]

o = ObjectFetcher(object_retrieval_endpoint, entity_retrieval_token)
objs = await o.fetch_objects(["Q90", "Q60"])

l = LiteralFetcher(literal_retrieval_endpoint, entity_retrieval_token)
lits = await l.fetch_literals(["Q90"])

print(objs)
print(lits)