# Dataset Routes


In [None]:
# | default_exp routes.dataset

In [None]:
# | exporti
from typing import Optional

import io
import pandas as pd

import aiohttp

import domolibrary.client.get_data as gd
import domolibrary.client.ResponseGetData as rgd
import domolibrary.client.DomoAuth as dmda

## Query Datasets

In [None]:
#| export
class DatasetNotFoundError(Exception):
    def __init__(self, dataset_id, domo_instance):
        message = f"dataset - {dataset_id} not found in {domo_instance}"

        super().__init__(message)

In [None]:
# | export

class QueryRequestError(Exception):
    def __init__(self, dataset_id, domo_instance, sql):
        message = f"dataset - {dataset_id} in {domo_instance} received a bad request.  Check your SQL \n {sql}"

        super().__init__(message)

# typically do not use
async def query_dataset_public(
    dev_auth: dmda.DomoDeveloperAuth,
    dataset_id: str,
    sql: str,
    session: aiohttp.ClientSession,
    debug_api: bool = False,
):

    """query for hitting public apis, requires client_id and secret authentication"""

    url = f"https://api.domo.com/v1/datasets/query/execute/{dataset_id}?IncludeHeaders=true"

    body = {"sql": sql}

    return await gd.get_data(
        auth=dev_auth, url=url, method="POST", body=body, session=session, debug_api=debug_api)
        

        
async def query_dataset_private(
    auth: dmda.DomoAuth,  # DomoFullAuth or DomoTokenAuth
    dataset_id: str,
    sql: str,
    session: Optional[aiohttp.ClientSession] = None,
    loop_until_end: bool = False,  # retrieve all available rows
    limit=100,  # maximum rows to return per request.  refers to PAGINATION
    skip=0,
    maximum=100,  # equivalent to the LIMIT or TOP clause in SQL, the number of rows to return total
    debug_api: bool = False,
    debug_loop: bool = False,
):
    """execute SQL queries against private APIs, requires DomoFullAuth or DomoTokenAuth"""

    url = f"https://{auth.domo_instance}.domo.com/api/query/v1/execute/{dataset_id}"

    offset_params = {
        "offset": "offset",
        "limit": "limit",
    }

    def body_fn(skip, limit):
        return {"sql": f"{sql} limit {limit} offset {skip}"}

    def arr_fn(res) -> pd.DataFrame:
        rows_ls = res.response.get("rows")
        columns_ls = res.response.get("columns")
        output = []
        for row in rows_ls:
            new_row = {}
            for index, column in enumerate(columns_ls):
                new_row[column] = row[index]
            output.append(new_row)
            # pd.DataFrame(data=res.response.get('rows'), columns=res.response.get('columns'))
        return output

    res = await gd.looper(
        auth=auth,
        method="POST",
        url=url,
        arr_fn=arr_fn,
        offset_params=offset_params,
        limit=limit,
        skip=skip,
        maximum=maximum,
        session=session,
        body_fn=body_fn,
        debug_api=debug_api,
        debug_loop=debug_loop,
        loop_until_end=loop_until_end
    )

    if res.status == 404 and res.response == 'Not Found':
        raise DatasetNotFoundError(dataset_id=dataset_id , domo_instance=auth.domo_instance)
    
    if res.status == 400 and res.response == 'Bad Request':
        raise QueryRequestError(dataset_id=dataset_id , domo_instance=auth.domo_instance, sql = sql)
    
    return res


In [None]:
import os
import pandas as pd

token_auth = dmda.DomoTokenAuth(
    domo_instance="domo-dojo",
    domo_access_token=os.environ["DOMO_DOJO_ACCESS_TOKEN"]
)

sql = f"SELECT * FROM TABLE"

ds_res = await query_dataset_private(dataset_id=os.environ['DOJO_DATASET_ID'],
                                     auth=token_auth,
                                     sql=sql,
                                     skip=42,
                                     maximum=5,
                                     loop_until_end=False)
pd.DataFrame(ds_res.response)


Unnamed: 0,objectID,url,Title,article,views,created_dt,published_dt
0,4790,https://domo-support.domo.com/s/article/360046...,"Starting, Stopping, and Restarting the Workben...",Important: Support for Workbench 4 ended on ...,39,2022-10-24T22:30:00,2022-10-24T22:41:00
1,4796,https://domo-support.domo.com/s/article/360047...,Understanding the Workbench 4 User Interface,Important: Support for Workbench 4 ended on ...,56,2022-10-24T22:30:00,2022-10-24T22:40:00
2,4773,https://domo-support.domo.com/s/article/360046...,Using the External Process File Provider in Wo...,Important: Support for Workbench 4 ended on ...,20,2022-10-24T22:30:00,2022-10-24T22:41:00
3,4798,https://domo-support.domo.com/s/article/360046...,Workbench 4 FAQs,Important: Support for Workbench 4 ended on ...,48,2022-10-24T22:30:00,2022-10-24T22:40:00
4,4800,https://domo-support.domo.com/s/article/360047...,Workbench 4 Overview,Important: Support for Workbench 4 ended on ...,40,2022-10-24T22:30:00,2022-10-24T22:41:00


## Dataset Properties

In [None]:
# | export
async def get_dataset_by_id(
    dataset_id: str, # dataset id from URL
    auth: Optional[dmda.DomoAuth] = None, # requires full authentication
    debug_api: bool = False, # for troubleshooting API request
    session: Optional[aiohttp.ClientSession] = None
) -> rgd.ResponseGetData: # returns metadata about a dataset
    """retrieve dataset metadata"""

    url = f"https://{auth.domo_instance}.domo.com/api/data/v3/datasources/{dataset_id}"

    res= await gd.get_data(
        auth=auth,
        url=url,
        method="GET",
        debug_api=debug_api, session = session
    )

    if res.status == 404 and res.response == 'Not Found':
        raise DatasetNotFoundError(dataset_id=dataset_id, domo_instance=auth.domo_instance)

    return res

In [None]:
import os
import pandas as pd

try:
    token_auth = dmda.DomoTokenAuth(
        domo_instance="domo-dojo", domo_access_token=os.environ["DOMO_DOJO_ACCESS_TOKEN"]
    )

    await get_dataset_by_id(dataset_id=123, auth=token_auth)

except DatasetNotFoundError as e:
    print(e) 

dataset - 123 not found in domo-dojo


In [None]:
import os
import pandas as pd

token_auth = dmda.DomoTokenAuth(
    domo_instance="domo-dojo", domo_access_token=os.environ["DOMO_DOJO_ACCESS_TOKEN"]
)

ds_res = await get_dataset_by_id(dataset_id=os.environ['DOJO_DATASET_ID'], auth=token_auth)
pd.DataFrame([ds_res.response])

Unnamed: 0,id,displayType,dataProviderType,type,name,owner,status,created,lastTouched,lastUpdated,...,transportType,adc,adcExternal,cloudId,cloudName,permissions,hidden,tags,scheduleActive,cryoStatus
0,04c1574e-c8be-4721-9846-c6ffa491144b,domo-jupyterdata,domo-jupyterdata,Jupyter,domo_kbs,"{'id': '1893952720', 'name': 'Jae Wilson', 'ty...",SUCCESS,1668379680000,1668385822000,1668385822045,...,API,False,False,domo,Domo,READ_WRITE_DELETE_SHARE_ADMIN,False,"[""Jan-24-2023 15:00"",""developer_documentation""...",True,ADRENALINE


In [None]:
# | export
async def get_schema(
    auth: dmda.DomoAuth, dataset_id: str, debug_api: bool = False
) -> rgd.ResponseGetData:
    """retrieve the schema for a dataset"""

    url = f"https://{auth.domo_instance}.domo.com/api/query/v1/datasources/{dataset_id}/schema/indexed?includeHidden=false"

    return await gd.get_data(auth=auth, url=url, method="GET", debug_api=debug_api)


#### sample implementation of get_schema


In [None]:
import os
import pandas as pd

token_auth = dmda.DomoTokenAuth(
    domo_instance="domo-dojo", domo_access_token=os.environ["DOMO_DOJO_ACCESS_TOKEN"]
)

ds_res = await get_schema(dataset_id=os.environ['DOJO_DATASET_ID'], auth=token_auth)
pd.DataFrame(ds_res.response)

Unnamed: 0,name,tables,dataSourceId,url,queryEndpoint,progressEndpoint,indexEndpoint,deleteEndpoint,versionId
0,domo_kbs,"{'columns': [{'name': 'objectID', 'id': 'objec...",04c1574e-c8be-4721-9846-c6ffa491144b,/schemas/853832B128D75BCE,/query/mmmm-0012-0200/04c1574e-c8be-4721-9846-...,/index/mmmm-0012-0200/04c1574e-c8be-4721-9846-...,/index/mmmm-0012-0200/04c1574e-c8be-4721-9846-...,/delete/mmmm-0012-0200/04c1574e-c8be-4721-9846...,3


In [None]:
# retrieve schema from response
pd.DataFrame(ds_res.response.get("tables")[0].get("columns"))

Unnamed: 0,name,id,type,visible,order
0,objectID,objectID,STRING,True,0
1,url,url,STRING,True,0
2,Title,Title,STRING,True,0
3,article,article,STRING,True,0
4,views,views,LONG,True,0
5,created_dt,created_dt,DATETIME,True,0
6,published_dt,published_dt,DATETIME,True,0


In [None]:
#| export
async def set_dataset_tags(auth: dmda.DomoFullAuth,
                           tag_ls: [str], # complete list of tags for dataset
                           dataset_id: str,
                           debug_api: bool = False,
                           session: Optional[aiohttp.ClientSession] = None,
                           return_raw : bool = False
                           ):
    
    """REPLACE tags on this dataset with a new list"""

    url = f"https://{auth.domo_instance}.domo.com/api/data/ui/v3/datasources/{dataset_id}/tags"

    res = await gd.get_data(
        auth=auth,
        url=url,
        method='POST',
        debug_api=debug_api,
        body=tag_ls,
        session=session,
        return_raw = return_raw
    )

    if return_raw:
        return res

    if res.status == 200:
        res.set_response (response = f'Dataset {dataset_id} tags updated to [{ ", ".join(tag_ls) }]')
    
    return res


In [None]:
import os

token_auth = dmda.DomoTokenAuth(
    domo_access_token=os.environ["DOMO_DOJO_ACCESS_TOKEN"],
    domo_instance="domo-dojo",
)

tag_ls = ['hackercore', 'developer_documentation']

await set_dataset_tags(auth=token_auth,
                       tag_ls=tag_ls,
                       dataset_id=os.environ['DOJO_DATASET_ID'],
                       debug_api = False,
                       return_raw = False
                       )


ResponseGetData(status=200, response='Dataset 04c1574e-c8be-4721-9846-c6ffa491144b tags updated to [hackercore, developer_documentation]', is_success=True)

## Upload Data

#### overview

In the URL, parts refers to the multi-part API and is unrelated to the partitions concept. The multi-part API was designed to allow sending multiple streams of Data into a data_version simultaneously.

In stage 1, the values passed in the Body will be superseded by values in the COMMIT (stage 3), so best practices is to not populate values here.

The response includes an uploadId, which must be stored and passed to the URL of the subsequent upload request (stages 2 and 3).

#### url params

The dataTag parameter allows users to UPDATE or REPLACE a datatag (partition)

NOTE: restateDataTag is largely deprecated // exists for backward compatibility

#### body params

The appendId parameter accepts "latest" or "None"

latest will APPEND the data version to the dataset


In [None]:
#| export
class UploadDataError(Exception):
    """raise if unable to upload data to Domo"""
    
    def __init__(self, stage_num : int, dataset_id : str, domo_instance : str):
        message = f"error uploading data to {dataset_id} during Stage { stage_num} in {domo_instance}"
        super().__init__(message)

In [None]:
# | export
async def upload_dataset_stage_1(auth: dmda.DomoAuth,
                                 dataset_id: str,
                                 #  restate_data_tag: str = None, # deprecated
                                 partition_tag: str = None,  # synonymous with data_tag
                                 session: Optional[aiohttp.ClientSession] = None,
                                 debug_api: bool = False,
                                 ) -> rgd.ResponseGetData:

    """preps dataset for upload by creating an upload_id (upload session key) pass to stage 2 as a parameter"""

    url = f"https://{auth.domo_instance}.domo.com/api/data/v3/datasources/{dataset_id}/uploads"

    # base body assumes no paritioning
    body = {
        "action": None,
        "appendId": None
    }

    params = None

    if partition_tag:
        # params = {'dataTag': restate_data_tag or data_tag} # deprecated
        params = {'dataTag': partition_tag}
        body.update({'appendId': 'latest'})

    res = await gd.get_data(auth=auth,
                         url=url, method='POST',
                         body=body,
                         session=session,
                         debug_api=debug_api,
                         params=params)

    if not res.is_success:
        raise UploadDataError(
            stage_num=1, dataset_id=dataset_id, domo_instance=auth.domo_instance)

    return res


In [None]:
# | export
async def upload_dataset_stage_2_file(
    auth: dmda.DomoAuth,
    dataset_id: str,
    upload_id: str,  # must originate from  a stage_1 upload response
    data_file: Optional[io.TextIOWrapper] = None,
    session: Optional[aiohttp.ClientSession] = None,
    # only necessary if streaming multiple files into the same partition (multi-part upload)
    part_id: str = 2,
    debug_api: bool = False,
) -> rgd.ResponseGetData:

    url = f"https://{auth.domo_instance}.domo.com/api/data/v3/datasources/{dataset_id}/uploads/{upload_id}/parts/{part_id}"

    body = data_file

    res = await gd.get_data(
        url=url,
        method="PUT",
        auth=auth,
        content_type="text/csv",
        body=body,
        session=session,
        debug_api=debug_api,
    )
    if not res.is_success:
        raise UploadDataError(stage_num = 2 , dataset_id = dataset_id, domo_instance = auth.domo_instance)

    res.upload_id = upload_id
    res.dataset_id = dataset_id
    res.part_id = part_id

    return res

In [None]:
# | export


async def upload_dataset_stage_2_df(
    auth: dmda.DomoAuth,
    dataset_id: str,
    upload_id: str,  # must originate from  a stage_1 upload response
    upload_df: pd.DataFrame,
    session: Optional[aiohttp.ClientSession] = None,
    part_id: str = 2,  # only necessary if streaming multiple files into the same partition (multi-part upload)
    debug_api: bool = False,
) -> rgd.ResponseGetData:

    url = f"https://{auth.domo_instance}.domo.com/api/data/v3/datasources/{dataset_id}/uploads/{upload_id}/parts/{part_id}"

    body = upload_df.to_csv(header=False, index=False)

    # if debug:
    #     print(body)

    res = await gd.get_data(
        url=url,
        method="PUT",
        auth=auth,
        content_type="text/csv",
        body=body,
        session=session,
        debug_api=debug_api,
    )

    if not res.is_success:
        raise UploadDataError(stage_num = 2 , dataset_id = dataset_id, domo_instance = auth.domo_instance)

    res.upload_id = upload_id
    res.dataset_id = dataset_id
    res.part_id = part_id

    return res

In [None]:
# | export
async def upload_dataset_stage_3(
    auth: dmda.DomoAuth,
    dataset_id: str,
    upload_id: str,  # must originate from  a stage_1 upload response
    session: Optional[aiohttp.ClientSession] = None,
    update_method: str = "REPLACE",  # accepts REPLACE or APPEND
    #  restate_data_tag: str = None, # deprecated
    partition_tag: str = None,  # synonymous with data_tag
    is_index: bool = False,  # index after uploading
    debug_api: bool = False,
) -> rgd.ResponseGetData:

    """commit will close the upload session, upload_id.  this request defines how the data will be loaded into Adrenaline, update_method
    has optional flag for indexing dataset.
    """

    url = f"https://{auth.domo_instance}.domo.com/api/data/v3/datasources/{dataset_id}/uploads/{upload_id}/commit"

    body = {"index": is_index, "action": update_method}

    if partition_tag:

        body.update(
            {
                "action": "APPEND",
                #  'dataTag': restate_data_tag or data_tag,
                #  'appendId': 'latest' if (restate_data_tag or data_tag) else None,
                "dataTag": partition_tag,
                "appendId": "latest" if partition_tag else None,
                "index": is_index,
            }
        )

    res = await gd.get_data(
        auth=auth, method="PUT", url=url, body=body, session=session, debug_api=debug_api
    )

    if not res.is_success:
        raise UploadDataError(stage_num = 3 , dataset_id = dataset_id, domo_instance = auth.domo_instance)

    res.upload_id = upload_id
    res.dataset_id = dataset_id

    return res

In [None]:
# | export

async def index_dataset(
    auth: dmda.DomoAuth,
    dataset_id: str,
    session: Optional[aiohttp.ClientSession] = None,
    debug_api: bool = False,
) -> rgd.ResponseGetData:
    """manually index a dataset"""

    url = f"https://{auth.domo_instance}.domo.com/api/data/v3/datasources/{dataset_id}/indexes"

    body = {"dataIds": []}

    return await gd.get_data(
        auth=auth, method="POST", body=body, url=url, session=session, debug_api = debug_api
    )

In [None]:
# | export
async def index_status(
    auth: dmda.DomoAuth,
    dataset_id: str,
    index_id: str,
    session: Optional[aiohttp.ClientSession] = None,
    debug_api: bool = False,
) -> rgd.ResponseGetData:
    """get the completion status of an index"""

    url = f"https://{auth.domo_instance}.domo.com/api/data/v3/datasources/{dataset_id}/indexes/{index_id}/statuses"

    return await gd.get_data(
        auth=auth, 
        method="GET", url=url, 
        session=session, debug_api=debug_api
    )


## Working with Partitions

In [None]:
#| export
def generate_list_partitions_body(limit=100, offset=0):
    return {
        "paginationFields": [{
            "fieldName": "datecompleted",
            "sortOrder": "DESC",
            "filterValues": {
                "MIN": None,
                "MAX": None
            }
        }],
        "limit": 1000,
        "offset": 0
    }


async def list_partitions(auth: dmda.DomoAuth,
                          dataset_id: str,
                          body: dict = None,
                          session: aiohttp.ClientSession = None,
                          debug_api: bool = False,
                          debug_loop: bool = False,


                          ):

    body = body or generate_list_partitions_body()

    url = f"https://{auth.domo_instance}.domo.com/api/query/v1/datasources/{dataset_id}/partition/list"

    offset_params = {
        'offset': 'offset',
        'limit': 'limit',
    }

    def arr_fn(res) -> list[dict]:
        return res.response

    res = await gd.looper(auth=auth,
                       method='POST',
                       url=url,
                       arr_fn=arr_fn,
                       body=body,
                       offset_params_in_body=True,
                       offset_params=offset_params,
                       loop_until_end=True,
                       session=session,
                       debug_loop=debug_loop,
                       debug_api = debug_api)

    if res.status == 404 and res.response == 'Not Found':
        raise DatasetNotFoundError(
            dataset_id=dataset_id, domo_instance=auth.domo_instance)
    return res


In [None]:
import os
import pandas as pd

token_auth = dmda.DomoTokenAuth(
    domo_instance="domo-dojo", domo_access_token=os.environ["DOMO_DOJO_ACCESS_TOKEN"]
)

dataset_id = 'd2b21660-4ba8-400c-badf-aeef5a9abae1'

await list_partitions(auth=token_auth, dataset_id=dataset_id)



ResponseGetData(status=200, response=[{'dataId': 372, 'partitionId': '2013-07-02', 'dateCompleted': '2023-01-24T14:27:21.000+00:00', 'rowCount': 1}, {'dataId': 373, 'partitionId': '2013-07-01', 'dateCompleted': '2023-01-24T14:27:21.000+00:00', 'rowCount': 1}, {'dataId': 354, 'partitionId': '2013-07-20', 'dateCompleted': '2023-01-24T14:27:20.000+00:00', 'rowCount': 1}, {'dataId': 355, 'partitionId': '2013-07-19', 'dateCompleted': '2023-01-24T14:27:20.000+00:00', 'rowCount': 1}, {'dataId': 356, 'partitionId': '2013-07-18', 'dateCompleted': '2023-01-24T14:27:20.000+00:00', 'rowCount': 1}, {'dataId': 357, 'partitionId': '2013-07-17', 'dateCompleted': '2023-01-24T14:27:20.000+00:00', 'rowCount': 1}, {'dataId': 358, 'partitionId': '2013-07-16', 'dateCompleted': '2023-01-24T14:27:20.000+00:00', 'rowCount': 1}, {'dataId': 359, 'partitionId': '2013-07-15', 'dateCompleted': '2023-01-24T14:27:20.000+00:00', 'rowCount': 1}, {'dataId': 360, 'partitionId': '2013-07-14', 'dateCompleted': '2023-01-24T

In [None]:
# # Delete partition has 3 stages
# # Stage 1. This marks the data version associated with the partition tag as deleted.  It does not delete the partition tag or remove the association between the partition tag and data version.  There should be no need to upload an empty file – step #3 will remove the data from Adrenaline.
# #| export
# async def delete_partition_stage_1(full_auth: DomoFullAuth,
#                                    dataset_id: str,
#                                    dataset_partition_id: str,
#                                    session: aiohttp.ClientSession = None,
#                                    debug: bool = False):

#     #url = f'https://{full_auth.domo_instance}.domo.com/api/query/v1/datasources/{dataset_id}/partition/{dataset_partition_id}'
#     # update on 9/9/2022 based on the conversation with Greg Swensen
#     url = f'https://{full_auth.domo_instance}.domo.com/api/query/v1/datasources/{dataset_id}/tag/{dataset_partition_id}/data'

#     return await get_data(
#         auth=full_auth,
#         method="DELETE",
#         url=url,
#         session=session,
#         debug=debug
#     )
# # Stage 2. This will remove the partition association so that it doesn’t show up in the list call.  Technically, this is not required as a partition against a deleted data version will not count against the 400 partition limit, but as the current partitions api doesn’t make that clear, cleaning these up will make it much easier for you to manage.

In [None]:
# #| export
# async def delete_partition_stage_2(full_auth: DomoFullAuth,
#                                    dataset_id: str,
#                                    dataset_partition_id: str,
#                                    session: aiohttp.ClientSession = None,
#                                    debug: bool = False):

#     url = f'https://{full_auth.domo_instance}.domo.com/api/query/v1/datasources/{dataset_id}/partition/{dataset_partition_id}'

#     return await get_data(
#         auth=full_auth,
#         method="DELETE",
#         url=url,
#         session=session,
#         debug=debug
#     )

In [None]:
# #| export
# async def delete(full_auth: DomoFullAuth,
#                  dataset_id: str, session: aiohttp.ClientSession = None, debug: bool = False):
#     url = f"https://{full_auth.domo_instance}.domo.com/api/data/v3/datasources/{dataset_id}?deleteMethod=hard"

#     return await get_data(
#         auth=full_auth,
#         method="DELETE",
#         url=url,
#         session=session,
#         debug=debug
#     )