a short post on using DomoLibrary to automate dataset tagging

# Project Configuration


In [None]:
import domolibrary.client.DomoAuth as dmda
import os

token_auth = dmda.DomoTokenAuth(
    domo_instance="domo-community",
    domo_access_token=os.environ["DOMO_DOJO_ACCESS_TOKEN"],
)

await token_auth.get_auth_token()

assert isinstance(token_auth.token, str)

# Get Datasets


In [None]:
import domolibrary.classes.DomoDatacenter as ddc
import pandas as pd

domo_datasets = await ddc.DomoDatacenter.search_datasets(auth=token_auth)

domo_datasets[0:5]

[DomoDataset(id='da552832-c04d-46ac-936a-f982d9d3f2e6', display_type='api', data_provider_type=None, name='SalesProject01_hello world_INT *', description=None, row_count=0, column_count=0, stream_id=None, owner={'id': '1893952720', 'name': 'Jae Wilson1', 'type': 'USER', 'group': False}, formula={}, schema=DomoDataset_Schema(dataset=..., columns=[]), tags=DomoDataset_Tags(dataset=..., tag_ls=['provider_type : api *', 'proj_phase : 01 *', 'proj_name : SalesProject *', 'ds_type : INT *'])),
 DomoDataset(id='a2f255fc-6c24-4e63-9b61-0c88ae7c33a5', display_type='webform', data_provider_type='webform', name='AJ | Waterfall Demo', description=None, row_count=48, column_count=4, stream_id=973, owner={'id': '1345737456', 'name': 'Aditya Jain', 'type': 'USER', 'group': False}, formula={'calculation_c4e9c63d-f4c9-4c13-a7bc-8baf83f55721': {'templateId': 2662, 'id': 'calculation_c4e9c63d-f4c9-4c13-a7bc-8baf83f55721', 'name': 'WF Values', 'formula': "SUM(CASE \n\tWHEN \n    \tDOMO_BEAST_MODE(2661) = 

In [None]:
import domolibrary.classes.DomoDataset as dmd


def process_ds(
    domo_dataset: dmd.DomoDataset,
) -> dict:  # dictionary where each attribute except _id and _ds_name becomes a tag
    """
    receives DomoDataset object, and then generates an object with attributes that will become tags.
    every organization will have different naming conventions and therefore must generate different rules for how tags can be exctracted from the Domo Dataset object.
    """

    proj_str = domo_dataset.name.split("_", 1)[0]
    proj_name = proj_str[:-2]
    proj_phase = proj_str[-2:]

    _ = domo_dataset.name.split("_", 1)[1]

    ds_type = _.split("_")[-1].replace(" *", "")

    return {
        "_id": domo_dataset.id,
        "_ds_name": domo_dataset.name,
        "provider_type": domo_dataset.display_type or domo_dataset.data_provider_type,
        "proj_name": proj_name,
        "proj_phase": proj_phase,
        "ds_type": ds_type,
    }

In [None]:
import pandas as pd

# for this project we assume any dataset that ends with * has been validated as adhering to the naming convention format.
dataset_tags_to_update = [
    process_ds(domo_dataset)
    for domo_dataset in domo_datasets
    if domo_dataset.name.endswith("*")
]

pd.DataFrame(dataset_tags_to_update)

Unnamed: 0,_id,_ds_name,provider_type,proj_name,proj_phase,ds_type
0,da552832-c04d-46ac-936a-f982d9d3f2e6,SalesProject01_hello world_INT *,api,SalesProject,1,INT
1,063dd227-2a2d-4563-a372-6d08c950e86d,SalesProject02_summary_by_month_DASH *,api,SalesProject,2,DASH
2,6114155e-e6a5-4c9c-bec0-5f0b8e807579,SalesProject01_hello world3_INT *,api,SalesProject,1,INT
3,e5f44d26-4028-4282-88a9-239a0f909373,SalesProject02_summary_by_day_DASH *,api,SalesProject,2,DASH


In [None]:
def generate_tags(tag_dict):
    """
    simple utility function to collapse key/value pairs into a string
    tags generated by this process will be suffixed with an *
    """

    tag_ls = [
        f"{key} : {tag_dict[key]} *"
        for key in tag_dict.keys()
        if not key.startswith("_")
    ]

    info = {
        key.replace("_", ""): tag_dict[key]
        for key in tag_dict.keys()
        if key.startswith("_")
    }

    return {"tag_ls": tag_ls, **info}

In [None]:
from pprint import pprint

# for each dataset to update, tag_ls contains the list of tags that will be added.
dataset_tags_for_api = [generate_tags(tag_dict) for tag_dict in dataset_tags_to_update]

pprint(dataset_tags_for_api)

[{'dsname': 'SalesProject01_hello world_INT *',
  'id': 'da552832-c04d-46ac-936a-f982d9d3f2e6',
  'tag_ls': ['provider_type : api *',
             'proj_name : SalesProject *',
             'proj_phase : 01 *',
             'ds_type : INT *']},
 {'dsname': 'SalesProject02_summary_by_month_DASH *',
  'id': '063dd227-2a2d-4563-a372-6d08c950e86d',
  'tag_ls': ['provider_type : api *',
             'proj_name : SalesProject *',
             'proj_phase : 02 *',
             'ds_type : DASH *']},
 {'dsname': 'SalesProject01_hello world3_INT *',
  'id': '6114155e-e6a5-4c9c-bec0-5f0b8e807579',
  'tag_ls': ['provider_type : api *',
             'proj_name : SalesProject *',
             'proj_phase : 01 *',
             'ds_type : INT *']},
 {'dsname': 'SalesProject02_summary_by_day_DASH *',
  'id': 'e5f44d26-4028-4282-88a9-239a0f909373',
  'tag_ls': ['provider_type : api *',
             'proj_name : SalesProject *',
             'proj_phase : 02 *',
             'ds_type : DASH *']}]


In [None]:
import domolibrary.classes.DomoDataset as dmd
import asyncio


async def process_dataset_tags(dataset_id, tag_ls: [str], auth: dmda.DomoAuth):
    """
    function to actually update domo dataset entities.
    tags previously added by this process (denoted with the * suffix) will be removed
    """

    domo_dataset = await dmd.DomoDataset.get_from_id(dataset_id=dataset_id, auth=auth)

    # remove old tags
    remove_tag_ls = [tag for tag in domo_dataset.tags.tag_ls if tag.endswith(" *")]

    if len(remove_tag_ls) > 0:
        await domo_dataset.tags.remove(remove_tag_ls=remove_tag_ls)

    # add new tags
    await domo_dataset.tags.add(add_tag_ls=tag_ls)

    return {
        "id": domo_dataset.id,
        "name": domo_dataset.name,
        "tags": domo_dataset.tags.tag_ls,
    }

In [None]:
import pandas as pd

res = await asyncio.gather(
    *[
        process_dataset_tags(
            dataset_id=row.get("id"), tag_ls=row.get("tag_ls"), auth=token_auth
        )
        for row in dataset_tags_for_api
    ]
)

pd.DataFrame(res)

Unnamed: 0,id,name,tags
0,da552832-c04d-46ac-936a-f982d9d3f2e6,SalesProject01_hello world_INT *,"[provider_type : api *, proj_phase : 01 *, pro..."
1,063dd227-2a2d-4563-a372-6d08c950e86d,SalesProject02_summary_by_month_DASH *,"[provider_type : api *, proj_phase : 02 *, ds_..."
2,6114155e-e6a5-4c9c-bec0-5f0b8e807579,SalesProject01_hello world3_INT *,"[provider_type : api *, proj_phase : 01 *, pro..."
3,e5f44d26-4028-4282-88a9-239a0f909373,SalesProject02_summary_by_day_DASH *,"[provider_type : api *, proj_phase : 02 *, ds_..."
