In [1]:
DATAFLOW_ID = 12

In [2]:
import domojupyter as dj
import domolibrary_execution.utils.domojupyter as dxdj

async def generate_auth():
    return await dxdj.generate_domo_auth(
        domo_instance = dxdj.which_environment(),
        domojupyter_fn = dj) 

auth = await generate_auth()

sdk_domo-community - no password stored in account
🎉 token_auth token retrieved from domo-community ⚙️


In [8]:
import domolibrary.client.DomoError as dmde
import domolibrary.client.DomoAuth as dmda

import routes.dataflow as dataflow_routes
import models.dataflow_model as dataflow_model
import pandas as pd

def get_dataflow_descriptive_data(dataflow_id : int ,
                                  auth : dmda.DomoAuth,
                                  debug_api: bool = False,
                                 return_raw: bool = False):
    
    """extract data from the dataflow definition that might be useful in producing a description of the dataflow"""
    
    res = dataflow_routes.get_dataflow_by_id_sync(dataflow_id = dataflow_id,
                                                  auth = auth,
                                                  debug_api = debug_api)
    
    if return_raw:
        return res
    
    if res.response.get('description') and "autogen" in res.response.get('description') :
        raise dmde.DomoError(message = f"dataflow_id {dataflow_id} description was already autogenerated")
    
    return dataflow_model.llm_dataflow_process_definition(res)

try:
    descriptive_data = get_dataflow_descriptive_data(DATAFLOW_ID , auth = auth, return_raw = False)
    print(descriptive_data)
except dmde.DomoError as e:
    print(e)


{'dataflow_id': 12, 'dataflow_name': 'Domo Governance Dataflow', 'dataflow_actions': [{'type': 'LoadFromVault', 'id': '0fab5ca6-3d86-4e43-8daf-2a5fed0d4e40', 'name': 'Governance_Dataflow Details', 'settings': {}, 'gui': {'x': 156, 'y': 204, 'color': 3238043, 'colorSource': None, 'sampleJson': None}, 'dataSourceId': '80c4a085-2cb7-4917-a956-ee11e3d838bb', 'executeFlowWhenUpdated': False, 'pseudoDataSource': False, 'truncateTextColumns': False, 'truncateRows': False, 'onlyLoadNewVersions': False, 'recentVersionCutoffMs': 0}, {'type': 'GroupBy', 'id': '7f1443c9-5584-4980-8546-4434286c16dc', 'name': 'Group By', 'dependsOn': ['0fab5ca6-3d86-4e43-8daf-2a5fed0d4e40'], 'settings': {}, 'gui': {'x': 300, 'y': 336, 'color': None, 'colorSource': None, 'sampleJson': None}, 'addLineNumber': False, 'giveBackRow': False, 'allRows': False, 'groups': [{'name': 'Name'}], 'fields': [{'name': 'Inputs', 'source': 'Input Dataset Name', 'type': 'CONCAT_COMMA', 'valuefield': None, 'expression': None, 'settings

In [10]:
import models.model as model
import datetime as dt

def generate_new_description(description : str) -> str:
    return f'{description}\n autogen via llm - {dt.datetime.now().strftime("%Y-%m-%d %H:%M")}'

def llm_describe_dataflow(descriptive_data, return_raw: bool = False, debug_api : bool = False):
    endpoint = model.EndpointHandler._from_creds_account(domo_instance = 'domo-community')
    
    messages = dataflow_model.generate_llm_messages()
    
    messages = endpoint.invoke_message(data = descriptive_data,
                                       debug_api = debug_api,
                                       messages = messages
                                      )
    
    if return_raw:
        return messages

    return generate_new_description( messages.messages[-1].message)

dataflow_description = llm_describe_dataflow(descriptive_data = descriptive_data)

dataflow_description

'This JSON describes a Domo ETL dataflow named "Domo Governance Dataflow" with ID 12. The dataflow consists of six actions:\n\n1. LoadFromVault: Loads data from a source named "Governance_Dataflow Details" with ID \'0fab5ca6-3d86-4e43-8daf-2a5fed0d4e40\'.\n\n2. GroupBy: Groups the data by the "Name" field. It concatenates values from "Input Dataset Name", "Output Dataset Name", "Input Dataset ID", and "Output Dataset ID" fields, creating new fields named "Inputs", "Outputs", "Input IDs", and "Outputs IDs" respectively.\n\n3. MergeJoin: Performs an inner join between the results of the LoadFromVault and GroupBy steps. The join is based on the "Name" field from both steps. The "Name" field from the GroupBy result is renamed to "Name_1".\n\n4. Unique: Removes duplicate rows based on the "Name" field. Case sensitivity is maintained.\n\n5. ReplaceString: Replaces all occurrences of commas with "<br>" in the "Inputs", "Outputs", "Input IDs", and "Outputs IDs" fields. This operation is not ca

In [12]:
def update_dataflow(dataflow_id, 
                    auth : dmda.DomoAuth,
                    new_description : str , 
                    debug_api : bool = False):
    
    res = dataflow_routes.get_dataflow_by_id_sync(dataflow_id = dataflow_id, auth = auth, debug_api = debug_api)
    
    update_dataflow_body = dataflow_routes.generate_update_dataflow_body(obj = res.response, description = new_description)

    return dataflow_routes.update_dataflow_by_id_sync(dataflow_id = dataflow_id, 
                                               auth = auth,
                                               dataflow_body = update_dataflow_body ,
                                               debug_api = debug_api)

update_dataflow(
    dataflow_id = DATAFLOW_ID,
    auth = auth,
    new_description = dataflow_description, 
    debug_api = True
)


{'body': None,
 'headers': {'x-domo-developer-token': '7bfb5625711672a405cc3c79c32c943795e78e00ecad53f1'},
 'method': 'GET',
 'url': 'https://domo-community.domo.com/api/dataprocessing/v1/dataflows/12'}
{'body': {'abandoned': False,
          'actions': [{'dataSourceId': '80c4a085-2cb7-4917-a956-ee11e3d838bb',
                       'executeFlowWhenUpdated': False,
                       'gui': {'color': 3238043,
                               'colorSource': None,
                               'sampleJson': None,
                               'x': 156,
                               'y': 204},
                       'id': '0fab5ca6-3d86-4e43-8daf-2a5fed0d4e40',
                       'name': 'Governance_Dataflow Details',
                       'onlyLoadNewVersions': False,
                       'pseudoDataSource': False,
                       'recentVersionCutoffMs': 0,
                       'settings': {},
                       'truncateRows': False,
                       'tru

ResponseGetData(status=200, response={'gui': {'version': '1.0', 'canvases': {'default': {'canvasSettings': {'coarserGrid': False, 'backgroundVariant': 'None'}, 'elements': [{'type': 'Tile', 'id': '0fab5ca6-3d86-4e43-8daf-2a5fed0d4e40', 'x': 156, 'y': 204, 'color': 3238043, 'colorSource': None, 'sampleJson': None}, {'type': 'Tile', 'id': '7f1443c9-5584-4980-8546-4434286c16dc', 'x': 300, 'y': 336, 'color': None, 'colorSource': None, 'sampleJson': None}, {'type': 'Tile', 'id': '4bacccc7-8aa1-408d-a9c1-6572e4be63ee', 'x': 420, 'y': 204, 'color': None, 'colorSource': None, 'sampleJson': None}, {'type': 'Tile', 'id': '6b311369-d32a-46a2-bb48-d0bba47257fd', 'x': 528, 'y': 204, 'color': None, 'colorSource': None, 'sampleJson': None}, {'type': 'Tile', 'id': '18c95e6c-d0ca-4a1f-8cff-5c01b7e54d90', 'x': 780, 'y': 204, 'color': None, 'colorSource': None, 'sampleJson': None}, {'type': 'Tile', 'id': 'fc028731-4f58-4e98-a923-bdc83fca8d74', 'x': 924, 'y': 204, 'color': None, 'colorSource': None, 'samp

In [14]:
def process_dataflow(dataflow_id,
                     auth : dmda.DomoAuth,
                     debug_prn: bool = False,
                     debug_api: bool = False, **kwargs):
    if debug_prn:
        index = kwargs.get('index')
        outOf = kwargs.get('outOf')
        end = None
        
        if index and outOf:
            end = f" {index} out of {outOf}"
        
        print(f'starting {dataflow_id}{end or ""}')
    
    try:
        dataflow_descriptive_data = get_dataflow_descriptive_data(dataflow_id , auth = auth, debug_api=debug_api)
         
        dataflow_description = llm_describe_dataflow(descriptive_data = descriptive_data, debug_api = debug_api)

        return update_dataflow(dataflow_id, 
                        auth = auth, 
                        new_description = dataflow_description,
                        debug_api = debug_api)
    
    except dmde.DomoError as e:
        print(f"{e} - on dataflow - {dataflow_id}")


process_dataflow(12, auth = auth)

ResponseGetData(status=200, response={'gui': {'version': '1.0', 'canvases': {'default': {'canvasSettings': {'coarserGrid': False, 'backgroundVariant': 'None'}, 'elements': [{'type': 'Tile', 'id': '0fab5ca6-3d86-4e43-8daf-2a5fed0d4e40', 'x': 156, 'y': 204, 'color': 3238043, 'colorSource': None, 'sampleJson': None}, {'type': 'Tile', 'id': '7f1443c9-5584-4980-8546-4434286c16dc', 'x': 300, 'y': 336, 'color': None, 'colorSource': None, 'sampleJson': None}, {'type': 'Tile', 'id': '4bacccc7-8aa1-408d-a9c1-6572e4be63ee', 'x': 420, 'y': 204, 'color': None, 'colorSource': None, 'sampleJson': None}, {'type': 'Tile', 'id': '6b311369-d32a-46a2-bb48-d0bba47257fd', 'x': 528, 'y': 204, 'color': None, 'colorSource': None, 'sampleJson': None}, {'type': 'Tile', 'id': '18c95e6c-d0ca-4a1f-8cff-5c01b7e54d90', 'x': 780, 'y': 204, 'color': None, 'colorSource': None, 'sampleJson': None}, {'type': 'Tile', 'id': 'fc028731-4f58-4e98-a923-bdc83fca8d74', 'x': 924, 'y': 204, 'color': None, 'colorSource': None, 'samp

In [22]:
import routes.datacenter as datacenter_routes
import importlib
importlib.reload(dmdc)

def main(auth, debug_api: bool = False, debug_prn: bool = False):
    
    query_body = {"entities":["DATAFLOW"],"filters":[
        {"filterType":"term","field":"owned_by_id","value":"1893952720:USER","name":"Jae Wilson1","not":False}],
                  "combineResults":True,"query":"*","count":30,"offset":0,
    }

    res = datacenter_routes.search_datacenter_sync(
        auth = auth,
        query_body= query_body,
        debug_api = debug_api
    )

    for index, obj in enumerate(res.response['searchObjects']):
        process_dataflow(dataflow_id = obj['databaseId'],
                         auth = auth,
                         index = index,
                         outOf = len(res.response['searchObjects']),
                         debug_prn = debug_prn
                        )
        

main(auth = auth, debug_prn = True, debug_api = False)

starting 227
🛑  DomoError 🛑 - function: dataflow_id 227 description was already autogenerated - on dataflow - 227
starting 60 1 out of 29
🛑  DomoError 🛑 - function: dataflow_id 60 description was already autogenerated - on dataflow - 60
starting 124 2 out of 29
🛑  DomoError 🛑 - function: dataflow_id 124 description was already autogenerated - on dataflow - 124
starting 86 3 out of 29
🛑  DomoError 🛑 - function: Forbidden || error - on dataflow - 86
starting 132 4 out of 29
🛑  DomoError 🛑 - function: dataflow_id 132 description was already autogenerated - on dataflow - 132
starting 344 5 out of 29
🛑  DomoError 🛑 - function: Forbidden || error - on dataflow - 344
starting 6 6 out of 29
🛑  DomoError 🛑 - function: dataflow_id 6 description was already autogenerated - on dataflow - 6
starting 131 7 out of 29
🛑  DomoError 🛑 - function: dataflow_id 131 description was already autogenerated - on dataflow - 131
starting 123 8 out of 29
🛑  DomoError 🛑 - function: dataflow_id 123 description was a