In [None]:
# | default_exp classes.DomoLineage

In [None]:
# | exporti

import httpx
import asyncio
from enum import Enum
import re
from nbdev.showdoc import patch_to

from dataclasses import dataclass, field

import domolibrary.client.DomoAuth as dmda
import domolibrary.classes.DomoDatacenter as dmdc

import domolibrary.routes.datacenter as datacenter_routes

import domolibrary.utils.chunk_execution as ce

In [None]:
# | export
class DomoLineage_Type(Enum):
    DomoDataflow = "DATAFLOW"
    DomoDataset = "DATA_SOURCE"
    DomoPublication = "PUBLICATION"


@dataclass
class DomoLineage:
    parent: any = field(repr=False)

    auth: dmda.DomoAuth = field(repr=False, default=None)

    page_id_ls: [str] = field(default_factory=list)
    card_id_ls: [str] = field(default_factory=list)

    dataflow_id_ls: [str] = field(default_factory=list)
    dataset_id_ls: [str] = field(default_factory=list)

    entity_ls: [any] = field(default_factory=list)

    def __post_init__(self):
        self.auth = self.auth or self.parent.auth

In [None]:
# | exporti


@patch_to(DomoLineage)
async def get_entity_lineage_upstream(
    self: DomoLineage,
    entity_id=None,
    entity_type=None,
    auth: dmda.DomoAuth = None,
    session: httpx.AsyncClient = None,
    debug_api: bool = False,
    debug_prn: bool = False,
    return_raw: bool = False,
):
    import domolibrary.classes.DomoDataflow as dmdf
    import domolibrary.classes.DomoDataset as dmds

    entity_id = entity_id or self.parent.id
    entity_type = entity_type or DomoLineage_Type[self.parent.__class__.__name__].value

    auth = auth or self.auth

    res = await datacenter_routes.get_lineage_upstream(
        auth=auth,
        entity_type=entity_type,
        entity_id=entity_id,
        session=session,
        debug_api=debug_api,
    )
    if return_raw or res.status != 200:
        return res

    ## reset
    self.entity_ls = []
    self.dataset_id_ls = []
    self.dataflow_id_ls = []

    for key, item in res.response.items():
        entity_type = item["type"]
        entity_id = item["id"]

        if entity_type == "DATA_SOURCE":
            if entity_id not in self.dataset_id_ls:
                self.dataset_id_ls.append(entity_id)

                domo_ds = await dmds.DomoDataset.get_from_id(
                    auth=auth, dataset_id=entity_id
                )

                self.entity_ls.append(domo_ds)

        if entity_type == "DATAFLOW":
            if entity_id not in self.dataflow_id_ls:
                self.dataflow_id_ls.append(entity_id)

                domo_df = await dmdf.DomoDataflow.get_from_id(
                    auth=auth, dataflow_id=entity_id
                )

                self.entity_ls.append(domo_df)

    return self.entity_ls

In [None]:
import os
import pandas as pd
import domolibrary.classes.DomoDataflow as dmdf


token_auth = dmda.DomoTokenAuth(
    domo_instance="domo-community",
    domo_access_token=os.environ["DOMO_DOJO_ACCESS_TOKEN"],
)

domo_dataflow = dmdf.DomoDataflow(id=112, auth=token_auth)

domo_lineage = DomoLineage(auth=token_auth, parent=domo_dataflow)

await domo_lineage.get_entity_lineage_upstream(debug_api=False, return_raw=False)

[DomoDataflow(id=112, name='Time Zone Test', auth=DomoTokenAuth(domo_instance='domo-community', token_name='token_auth', is_valid_token=True, url_manual_login='https://domo-community.domo.com/auth/index?domoManualLogin=true'), owner=None, description=None, tags=None, actions=[DomoDataflow_Action(type='LoadFromVault', id='74e125a8-76fb-49e6-8825-c1d0ee121641', name='Time Zone Test', data_source_id='3929c2cd-6549-4f53-9539-0f3e34233ef0', sql=None), DomoDataflow_Action(type='ExpressionEvaluator', id='7a287510-67d9-40e2-9a6e-d8f247b8b005', name='Offset Prep', data_source_id=None, sql=None), DomoDataflow_Action(type='DateCalculator', id='be94da12-290e-41bc-a8f2-a076348a9b1a', name='Date Operations', data_source_id=None, sql=None), DomoDataflow_Action(type='ExpressionEvaluator', id='65e20743-2ca9-4bd7-ac1d-7b4a27ee9c5c', name='Time Conversions 1', data_source_id=None, sql=None), DomoDataflow_Action(type='PublishToVault', id='3e7dd3a3-9f6a-4a66-a709-4e9f2172bb36', name='Time Zone Test | Outpu

In [None]:
# | exporti


@patch_to(DomoLineage)
async def _get_page_card_ids(self: DomoLineage):
    import domolibrary.classes.DomoPage as dmpg

    if not self.parent.content_page_id_ls or len(self.parent.content_page_id_ls) == 0:
        return None

    page_card_ls = await ce.gather_with_concurrency( n = 60,
        *[
            dmpg.DomoPage.get_cards(page_id=page_id, auth=self.parent.auth)
            for page_id in self.parent.content_page_id_ls
        ]
    )

    if not page_card_ls or len(page_card_ls) == 0:
        return

    if not self.card_id_ls:
        self.card_id_ls = []

    for page in page_card_ls:
        if page and len(page) > 0:
            for card in page:
                if card.id not in self.card_id_ls:
                    self.card_id_ls.append(card.id)

    return self.card_id_ls

In [None]:
# async def _get_entity_ls_lineage(self,
#                                  domo_entity: dmdc.DomoEntity,
#                                  full_auth=None,
#                                  session: dmda.DomoFullAuth = None,
#                                  debug: bool = False, debug_prn: bool = False):
#     full_auth = full_auth or self.full_auth

#     entity_attribute = f"{domo_entity.name.lower()}_id_ls"

#     return await asyncio.gather(*[self.get_entity_lineage_upstream(full_auth=full_auth,
#                                                                    entity_id=entity_id,
#                                                                    entity_type=domo_entity.value,
#                                                                    session=session,
#                                                                    debug=debug, debug_prn=debug_prn)
#                                   for entity_id in getattr(self, entity_attribute)])




In [None]:
#| exporti

@patch_to(DomoLineage)
def _get_content_list_ls(self : DomoLineage, regex_pattern_ls=None):
    regex_pattern_ls = regex_pattern_ls or [".*_id_ls$", "^content_.*"]

    content_list_ls = [
        content_list
        for content_list in dir(self.parent)
        if all([re.match(pattern, content_list) for pattern in regex_pattern_ls])
    ]

    result = []
    for content_name in content_list_ls:
        if (
            not getattr(self.parent, content_name)
            or len(getattr(self.parent, content_name)) == 0
        ):
            continue

        base_name = content_name
        [
            base_name := re.sub(regex_pattern.replace(".*", ""), "", base_name)
            for regex_pattern in regex_pattern_ls
        ]

        result.append(
            {
                "list_name": content_name,
                "entity_name": base_name,
                "regex_pattern_ls": regex_pattern_ls,
            }
        )
    return result

@patch_to(DomoLineage)
def _reset_lineage_and_sync_parent(self):

    content_list = self._get_content_list_ls()

    for content_obj in content_list:

        parent_content = getattr(self.parent, content_obj.get('list_name'))

        lineage_content_name = f"{content_obj.get('entity_name')}_id_ls"

        setattr(self,
                lineage_content_name,
                parent_content
                )
    return self

@patch_to(DomoLineage)
async def get(self: DomoLineage,
              debug_prn: bool = False, 
              debug_api: bool = False,
              auth: dmda.DomoAuth = None,
              session=httpx.AsyncClient):

    auth = auth or self.parent.auth

    self._reset_lineage_and_sync_parent()

    if self.page_id_ls:
        await self._get_page_card_ids()

#     if self.card_id_ls and len(self.card_id_ls) > 0:
#         if debug_prn:
#             print(f'üèÅ getting card lineage for repo {self.id}')
#         await self._get_entity_ls_lineage(domo_entity=dmdc.DomoEntity.CARD,
#                                           full_auth=full_auth,
#                                           debug_prn=debug_prn, debug=debug, session=session)

#     if self.dataflow_id_ls and len(self.dataflow_id_ls) > 0:
#         if debug_prn:
#             print(f'üèÅ getting dataflow lineage for repo {self.id}')

#         await self._get_entity_ls_lineage(domo_entity=dmdc.DomoEntity.DATAFLOW,
#                                           full_auth=full_auth,
#                                           debug_prn=debug_prn, debug=debug, session=session)

#     if self.dataset_id_ls and len(self.dataset_id_ls) > 0:
#         if debug_prn:
#             print(f'üèÅ getting dataset lineage for repo {self.id}')

#         await self._get_entity_ls_lineage(domo_entity=dmdc.DomoEntity.DATASET,
#                                           full_auth=full_auth,
#                                           debug_prn=debug_prn, debug=debug, session=session)

#     return self

In [None]:
import os
import pandas as pd
import domolibrary.classes.DomoPublish as dmpb

token_auth = dmda.DomoTokenAuth(
    domo_instance="domo-community", domo_access_token=os.environ["DOMO_DOJO_ACCESS_TOKEN"]
)

publication_id = "701eb547-4244-43f6-bcda-003ed2c483c7"

domo_publication = await dmpb.DomoPublication.get_from_id(
    publication_id=publication_id, auth=token_auth
)

domo_lineage = DomoLineage(auth=token_auth, parent=domo_publication)

await domo_lineage.get()

# domo_lineage


In [None]:
#| exporti
@patch_to(DomoLineage)
def _flatten_lineage(self):
    attribute_ls = _get_content_list_ls(self, ['.*_id_ls$'])

    output_ls = []

    for attribute in attribute_ls:
        ls_name = attribute.get('list_name')
        entity_name = attribute.get('entity_name')
        entity_type = dmdc.DomoEntity[entity_name.upper()].value

        row_ls = [{'entity_type': entity_type,
                   'entity_id': row} for row in getattr(self, ls_name)]
        output_ls += row_ls

    return output_ls

In [None]:
import nbdev

nbdev.nbdev_export()