### Imports

In [None]:
from vastdb.session import Session
from dotenv import load_dotenv
import os
import pyarrow as pa
import numpy as np

load_dotenv()
vast_db_access = os.getenv("VAST_DB_ACCESS")
vast_db_secret = os.getenv("VAST_DB_SECRET")
endpoint = os.getenv("VAST_DB_ENDPOINT")
sess = Session(access=vast_db_access, secret=vast_db_secret, endpoint=endpoint)

### Update row(s)

#### update/zero multiplicity

In [None]:
with sess.transaction() as tx:
    table = tx.bucket("colabfit").schema("dev").table("po_wip")
    reader = table.select(
        predicate=(table["dataset_id"] == "DS_otx1qc9f3pm4_0")
        & (table["multiplicity"] > 1),  # or > 0 if zeroing multiplicity
        internal_row_id=True,
    )
    for batch in reader:
        pbatch = batch.to_pandas()
        pbatch["multiplicity"] = np.int32(1)  # or 0, if zeroing multiplicity
        # print(pbatch.loc[:, ["multiplicity", "$row_id"]])
        pa_table = pa.table(pbatch.loc[:, ["multiplicity", "$row_id"]])
        print(pa_table.schema)
        table.update(pa_table, columns=["multiplicity"])

#### Change dataset names

In [None]:
ds_name = ""
new_ds_name = ""
with sess.transaction() as tx:
    table = tx.bucket("colabfit").schema("dev").table("ds_wip")
    reader = table.select(
        predicate=(table["name"] == ds_name),
        internal_row_id=True,
    )
    for batch in reader:
        pbatch = batch.to_pandas()
        pbatch["name"] = new_ds_name
        print(pbatch.loc[:, ["name", "$row_id"]])
        pa_table = pa.table(pbatch.loc[:, ["name", "$row_id"]])
        print(pa_table.schema)
        table.update(pa_table, columns=["name"])

#### Edit dataset description

In [None]:
dataset_id = ""

with sess.transaction() as tx:
    table = tx.bucket("colabfit").schema("dev").table("ds_wip")
    reader = table.select(
        predicate=(table["id"] == dataset_id),
        internal_row_id=True,
    )
    for batch in reader:
        pbatch = batch.to_pandas()
        pbatch["description"] = ""
        print(pbatch.loc[:, ["name", "$row_id"]])
        pa_table = pa.table(pbatch.loc[:, ["name", "$row_id"]])
        print(pa_table.schema)
        table.update(pa_table, columns=["name"])

#### Add dataset DOIs

In [None]:
doi_map = {"": ""}
[""]

In [None]:
for id, doi in doi_map.items():
    with sess.transaction() as tx:
        table = tx.bucket("colabfit").schema("dev").table("ds_wip")
        row = table.select(predicate=table["id"] == id, internal_row_id=True)
        row = row.read_all()

    row = row.to_pandas()
    row["doi"] = doi
    update_table = pa.table(row[["id", "doi", "$row_id"]])
    with sess.transaction() as tx:
        table = tx.bucket("colabfit").schema("dev").table("ds_wip")
        table.update(rows=update_table, columns=["doi"])

In [None]:
# Add a column value to single row
id = "dataset_id"
col = "publication_year"
value = "2024"
with sess.transaction() as tx:
    table = tx.bucket("colabfit").schema("dev").table("ds_wip")
    row = table.select(predicate=table["id"] == id, internal_row_id=True)
    row = row.read_all()
row = row.to_pandas()
row[col] = value
update_table = pa.table(row[["id", col, "$row_id"]])
with sess.transaction() as tx:
    table = tx.bucket("colabfit").schema("dev").table("ds_wip")
    table.update(rows=update_table, columns=[col])

### Delete a row

In [None]:
# Delete a row
id = "dataset_id"
with sess.transaction() as tx:
    table = tx.bucket("colabfit").schema("dev").table("ds_wip")
    row = table.select(predicate=table["id"] == id, internal_row_id=True)
    table.delete(row.read_next_batch())