In [1]:
from vastdb.session import Session
from dotenv import load_dotenv
from colabfit.tools.schema import *
import os

load_dotenv()
endpoint = os.getenv("VAST_DB_ENDPOINT")
access = os.getenv("VAST_DB_ACCESS")
secret = os.getenv("VAST_DB_SECRET")
sess = Session(access=access, secret=secret, endpoint=endpoint)

In [None]:
date = "20240919"
for t in ["ds", "cs", "co", "po"]:
    with sess.transaction() as tx:
        table = tx.bucket("colabfit-prod").schema("prod").table(t)
        table.rename(f"{t}_{date}")

for t in ["ds", "cs", "co", "po"]:
    old_name = f"tmp_{t}"
    with sess.transaction() as tx:
        table = tx.bucket("colabfit-prod").schema("prod").table(old_name)
        table.rename(t)

In [None]:
# See projections
import pyarrow as pa


def columns(self) -> pa.Schema:
    """Return this projections' columns as an Arrow schema."""
    columns = []
    next_key = 0
    while True:
        curr_columns, next_key, is_truncated, _count = (
            self.tx._rpc.api.list_projection_columns(
                self.bucket.name,
                self.schema.name,
                self.table.name,
                self.name,
                txid=self.table.tx.txid,
                next_key=next_key,
            )
        )
        if not curr_columns:
            break
        columns.extend(curr_columns)
        if not is_truncated:
            break
    self.arrow_schema = pa.schema([(col[0], col[1]) for col in columns])
    return self.arrow_schema


with sess.transaction() as tx:
    table = tx.bucket("colabfit").schema("dev").table("co_wip")
    print("projections\n")
    for x in table.projections():
        print(x.name)
        print(columns(x))
    print("table\n", table)

# For prod namespace
with sess.transaction() as tx:
    table = tx.bucket("colabfit-prod").schema("prod").table("co")
    print("projections\n")
    for x in table.projections():
        print(x.name)
        print(columns(x))

In [None]:
# Drop projections

# with sess.transaction() as tx:
#     table = tx.bucket("colabfit-prod").schema("prod").table("co_tmp")
#     table.projection("co_dsid").drop()

In [None]:
# Make projections on COs
sorted_columns = ["dataset_ids"]
unsorted_columns = ["id"]
with sess.transaction() as tx:
    table = tx.bucket("colabfit").schema("dev").table("co_ocall")
    table.create_projection(
        projection_name="co-dataset_ids",
        sorted_columns=sorted_columns,
        unsorted_columns=unsorted_columns,
    )

sorted_columns = ["id"]
unsorted_columns = [
    col for col in config_schema.fieldNames() if col not in sorted_columns
]
with sess.transaction() as tx:
    table = tx.bucket("colabfit").schema("dev").table("co_ocall")
    table.create_projection(
        projection_name="co-id-all",
        sorted_columns=sorted_columns,
        unsorted_columns=unsorted_columns,
    )

# Make projections on POs
"""
Our projections have been:
po-dataset_id
dataset_id: string
id: string

po-configuration_id
configuration_id: string
id: string

po-id-all
id: string
(all the rest unsorted)

"""
sorted_columns = ["dataset_id"]
unsorted_columns = ["id"]
with sess.transaction() as tx:
    table = tx.bucket("colabfit").schema("dev").table("po_wip")
    table.create_projection(
        projection_name="po-dataset_id",
        sorted_columns=sorted_columns,
        unsorted_columns=unsorted_columns,
    )

sorted_columns = ["configuration_id"]
unsorted_columns = ["id"]
with sess.transaction() as tx:
    table = tx.bucket("colabfit").schema("dev").table("po_wip")
    table.create_projection(
        projection_name="po-configuration_id",
        sorted_columns=sorted_columns,
        unsorted_columns=unsorted_columns,
    )

sorted_columns = ["id"]
unsorted_columns = [
    col for col in property_object_schema.fieldNames() if col not in sorted_columns
]
with sess.transaction() as tx:
    table = tx.bucket("colabfit").schema("dev").table("po_wip")
    table.create_projection(
        projection_name="po-id-all",
        sorted_columns=sorted_columns,
        unsorted_columns=unsorted_columns,
    )


# Make projections on DSs
sorted_columns = ["id"]
unsorted_columns = [
    col for col in dataset_schema.fieldNames() if col not in sorted_columns
]
with sess.transaction() as tx:
    table = tx.bucket("colabfit").schema("dev").table("ds_wip2")
    table.create_projection(
        projection_name="ds_id",
        sorted_columns=sorted_columns,
        unsorted_columns=unsorted_columns,
    )

# Make projections on CSs
sorted_columns = ["id"]
unsorted_columns = [
    col for col in configuration_set_schema.fieldNames() if col not in sorted_columns
]
with sess.transaction() as tx:
    table = tx.bucket("colabfit").schema("dev").table("cs_wip")
    table.create_projection(
        projection_name="cs_id_dsid",
        sorted_columns=sorted_columns,
        unsorted_columns=unsorted_columns,
    )

In [None]:
# Make projections on COs
sorted_columns = ["dataset_ids"]
unsorted_columns = ["id"]
with sess.transaction() as tx:
    table = tx.bucket("colabfit-prod").schema("prod").table("co_tmp")
    table.create_projection(
        projection_name="co-dataset_ids",
        sorted_columns=sorted_columns,
        unsorted_columns=unsorted_columns,
    )

sorted_columns = ["id"]
unsorted_columns = [
    col for col in config_schema.fieldNames() if col not in sorted_columns
]
with sess.transaction() as tx:
    table = tx.bucket("colabfit-prod").schema("prod").table("co_tmp")
    table.create_projection(
        projection_name="co-id-all",
        sorted_columns=sorted_columns,
        unsorted_columns=unsorted_columns,
    )


# Make projections on POs

sorted_columns = ["dataset_id"]
unsorted_columns = ["id"]
with sess.transaction() as tx:
    table = tx.bucket("colabfit-prod").schema("prod").table("po_tmp")
    table.create_projection(
        projection_name="po-dataset_id",
        sorted_columns=sorted_columns,
        unsorted_columns=unsorted_columns,
    )


sorted_columns = ["configuration_id"]
unsorted_columns = ["id"]
with sess.transaction() as tx:
    table = tx.bucket("colabfit-prod").schema("prod").table("po_tmp")
    table.create_projection(
        projection_name="po-configuration_id",
        sorted_columns=sorted_columns,
        unsorted_columns=unsorted_columns,
    )


sorted_columns = ["id"]
unsorted_columns = [
    col for col in property_object_schema.fieldNames() if col not in sorted_columns
]
with sess.transaction() as tx:
    table = tx.bucket("colabfit-prod").schema("prod").table("po_tmp")
    table.create_projection(
        projection_name="po-id-all",
        sorted_columns=sorted_columns,
        unsorted_columns=unsorted_columns,
    )


# Make projections on DSs
sorted_columns = ["id"]
unsorted_columns = [
    col for col in dataset_schema.fieldNames() if col not in sorted_columns
]
with sess.transaction() as tx:
    table = tx.bucket("colabfit-prod").schema("prod").table("ds_tmp")
    table.create_projection(
        projection_name="ds-id-all",
        sorted_columns=sorted_columns,
        unsorted_columns=unsorted_columns,
    )


# Make projections on CSs

sorted_columns = ["id"]
unsorted_columns = [
    col for col in configuration_set_schema.fieldNames() if col not in sorted_columns
]
with sess.transaction() as tx:
    table = tx.bucket("colabfit-prod").schema("prod").table("cs_tmp")
    table.create_projection(
        projection_name="cs-id-all",
        sorted_columns=sorted_columns,
        unsorted_columns=unsorted_columns,
    )

In [None]:
old_name = ""
new_name = ""
with sess.transaction() as tx:
    table = tx.bucket("colabfit").schema("dev").table(old_name)
    table.rename(new_name)

In [None]:
old_name = ""
new_name = ""
with sess.transaction() as tx:
    table = tx.bucket("colabfit-prod").schema("prod").table(old_name)
    table.rename(new_name)