In [1]:
DATAFLOW_ID = 12

In [2]:
import domojupyter as dj
import domolibrary_execution.utils.domojupyter as dxdj

async def generate_auth():
    return await dxdj.generate_domo_auth(
        domo_instance = dxdj.which_environment(),
        domojupyter_fn = dj) 

auth = await generate_auth()

sdk_domo-community - no password stored in account
🎉 token_auth token retrieved from domo-community ⚙️


In [3]:
import domolibrary.client.DomoError as dmde
import domolibrary.client.DomoAuth as dmda

import routes.dataflow as dataflow_routes
import models.dataflow_model as dataflow_model
import pandas as pd

def get_dataflow_descriptive_data(dataflow_id : int ,
                                  auth : dmda.DomoAuth,
                                  debug_api: bool = False):
    
    res = dataflow_routes.get_dataflow_by_id_sync(dataflow_id = dataflow_id,
                                                  auth = auth,
                                                  debug_api = debug_api)
    
    if res.response.get('description') and "autogen" not in res.response.get('description') :
        raise dmde.DomoError(message = f"dataflow_id {dataflow_id} has a description already")
    
    return dataflow_model.llm_dataflow_process_definition(res)

try:
    descriptive_data = get_dataflow_descriptive_data(DATAFLOW_ID , auth = auth)
    print(descriptive_data)
except dmde.DomoError as e:
    print(e)


{'dataflow_id': 12, 'dataflow_name': 'Domo Governance Dataflow', 'dataflow_actions': [{'type': 'LoadFromVault', 'id': '0fab5ca6-3d86-4e43-8daf-2a5fed0d4e40', 'name': 'Governance_Dataflow Details', 'settings': {}, 'gui': {'x': 156, 'y': 204, 'color': 3238043, 'colorSource': None, 'sampleJson': None}, 'dataSourceId': '80c4a085-2cb7-4917-a956-ee11e3d838bb', 'executeFlowWhenUpdated': False, 'pseudoDataSource': False, 'truncateTextColumns': False, 'truncateRows': False, 'onlyLoadNewVersions': False, 'recentVersionCutoffMs': 0}, {'type': 'GroupBy', 'id': '7f1443c9-5584-4980-8546-4434286c16dc', 'name': 'Group By', 'dependsOn': ['0fab5ca6-3d86-4e43-8daf-2a5fed0d4e40'], 'settings': {}, 'gui': {'x': 300, 'y': 336, 'color': None, 'colorSource': None, 'sampleJson': None}, 'addLineNumber': False, 'giveBackRow': False, 'allRows': False, 'groups': [{'name': 'Name'}], 'fields': [{'name': 'Inputs', 'source': 'Input Dataset Name', 'type': 'CONCAT_COMMA', 'valuefield': None, 'expression': None, 'settings

In [4]:
import models.model as model
import datetime as dt

def generate_new_description(description : str) -> str:
    return f'{description}\n autogen via llm - {dt.datetime.now().strftime("%Y-%m-%d %H:%M")}'

def llm_describe_dataflow(descriptive_data, return_raw: bool = False, debug_api : bool = False):
    endpoint = model.EndpointHandler._from_creds_account(domo_instance = 'domo-community')

    messages = endpoint.llm_describe_dataflow(data = descriptive_data,debug_api = debug_api)
    
    if return_raw:
        return messages

    return generate_new_description( messages.messages[-1].message)

dataflow_description = llm_describe_dataflow(descriptive_data = descriptive_data)

dataflow_description

'This JSON describes a Domo ETL dataflow named "Domo Governance Dataflow" with ID 12. The dataflow consists of six actions:\n\n1. LoadFromVault: Loads data from a vault source with ID \'80c4a085-2cb7-4917-a956-ee11e3d838bb\' named "Governance_Dataflow Details".\n\n2. GroupBy: Groups the data by the "Name" field and performs concatenation operations on four fields: "Inputs", "Outputs", "Input IDs", and "Output IDs". Each of these fields uses the CONCAT_COMMA aggregation type.\n\n3. MergeJoin: Performs an inner join between the results of the LoadFromVault and GroupBy actions. The join is based on the "Name" field from both inputs. The "Name" field from the second input is renamed to "Name_1" in the output.\n\n4. Unique: Removes duplicate rows based on the "Name" field. The operation is case-sensitive.\n\n5. ReplaceString: Replaces commas with "<br>" in the "Inputs", "Outputs", "Input IDs", and "Output IDs" fields. This operation is not case-sensitive and does not use regex.\n\n6. Publis

In [5]:
def update_dataflow(dataflow_id, 
                    auth : dmda.DomoAuth,
                    new_description : str , 
                    debug_api : bool = False):
    
    res = dataflow_routes.get_dataflow_by_id_sync(dataflow_id = dataflow_id, auth = auth, debug_api = debug_api)
    
    update_dataflow_body = dataflow_routes.generate_update_dataflow_body(obj = res.response, description = new_description)

    return dataflow_routes.update_dataflow_by_id_sync(dataflow_id = dataflow_id, 
                                               auth = auth,
                                               dataflow_body = update_dataflow_body ,
                                               debug_api = debug_api)

In [6]:
def process_dataflow(dataflow_id,
                     auth : dmda.DomoAuth,
                     debug_prn: bool = False,
                     debug_api: bool = False, **kwargs):
    if debug_prn:
        index = kwargs.get('index')
        outOf = kwargs.get('outOf')
        end = None
        
        if index and outOf:
            end = f" {index} out of {outOf}"
        
        print(f'starting {dataflow_id}{end or ""}')
    
    try:
        dataflow_descriptive_data = get_dataflow_descriptive_data(dataflow_id , auth = auth, debug_api=debug_api)
         
        dataflow_description = llm_describe_dataflow(descriptive_data = descriptive_data, debug_api = debug_api)

        return update_dataflow(dataflow_id, 
                        auth = auth, 
                        new_description = dataflow_description,
                        debug_api = debug_api)
    
    except dmde.DomoError as e:
        print(f"{e} - on dataflow - {dataflow_id}")


process_dataflow(132, auth = auth)

ResponseGetData(status=200, response={'gui': {'version': '1.0', 'canvases': {'default': {'canvasSettings': {'coarserGrid': False, 'backgroundVariant': 'None'}, 'elements': [{'type': 'Tile', 'id': '265a26c7-61b4-4161-85b4-fae0ddeca575', 'x': 36, 'y': 180, 'color': 3238043, 'colorSource': None, 'sampleJson': None}, {'type': 'Tile', 'id': '76093974-bee2-4bbc-9d00-fdd90cf99564', 'x': 216, 'y': 180, 'color': None, 'colorSource': None, 'sampleJson': None}, {'type': 'Tile', 'id': 'fef76f74-691d-42f9-98a7-23619323e15c', 'x': 420, 'y': 180, 'color': None, 'colorSource': None, 'sampleJson': None}, {'type': 'Tile', 'id': '57525a53-399f-4f83-a88d-9a77c6873516', 'x': 552, 'y': 180, 'color': None, 'colorSource': None, 'sampleJson': None}, {'type': 'Tile', 'id': 'b2b86b35-a4b5-433d-80c4-90d6d6e3b332', 'x': 792, 'y': 192, 'color': None, 'colorSource': None, 'sampleJson': None}]}}}, 'id': 132, 'name': 'BeastModesGovernance_02_ChangeReport', 'description': 'The JSON describes a Domo Governance Dataflow 

In [None]:
import routes.datacenter as dmdc
import importlib
importlib.reload(dmdc)

query_body = {"entities":["DATAFLOW"],"filters":[
    # {"field":"name_sort","filterType":"wildcard","query":"*b*"},
    {"filterType":"term","field":"owned_by_id","value":"1893952720:USER","name":"Jae Wilson1","not":False}],
              "combineResults":True,"query":"*","count":30,"offset":0,
             }

res = dmdc.search_datacenter_sync(
    auth = auth,
    query_body= query_body,
    debug_api = False
)

for index, obj in enumerate(res.response['searchObjects']):
    process_dataflow(dataflow_id = obj['databaseId'],
                     auth = auth,
                     index = index,
                     outOf = len(res.response['searchObjects'])
    ) 