# Dataset

> a class based approach for interacting with Domo Datasets


In [1]:
# | default_exp classes.DomoDataset

In [3]:
#| hide
from nbdev.showdoc import show_doc

In [2]:
# |export
from fastcore.basics import patch_to
import pandas as pd

In [1]:
# | exporti
from dataclasses import dataclass, field
from typing import List, Optional
from enum import Enum

import json
import io

import httpx
import asyncio


# import datetime as dt

# import importlib
# import json
# from pprint import pprint

# import pandas as pd


# from ..utils.Base import Base
# from ..utils.chunk_execution import chunk_list
# from . import DomoCertification as dmdc
# from . import DomoPDP as dmpdp
# from . import DomoTag as dmtg


import domolibrary.utils.DictDot as util_dd
import domolibrary.client.DomoAuth as dmda
import domolibrary.client.DomoError as de
import domolibrary.routes.dataset as dataset_routes
#import domolibrary.classes.DomoPDP as dmpdp

# Component Classes
## DatasetSchema

The `DomoDataset_Schema` class will be a subclass of `DomoDataset`. It will handle all of the methods for interacting with schemas.

- In execution, the schema is separate from the data that gets uploaded from Vault to Adrenaline. The domo schema defines how the data is loaded into Vault.
- Be cognizant to match dataset uploads with schema definitions. If the schema and uploaded data types do not match, the dataset may be unable to index in Adrenaline (and therefore not update).


In [4]:
#| exporti
async def _have_prereqs(self, auth, dataset_id, function_name):
    """tests if have a parent dataset or prerequsite dataset_id and auth object"""

    auth_from_self_dataset = getattr(self.dataset, 'auth', None) if getattr(self, 'dataset', None) else None
    auth_from_self = getattr(self , 'auth', None)

    auth = auth or auth_from_self or auth_from_self_dataset

    await auth.get_auth_token()

    if not auth or not auth.token:
        raise de.AuthNotProvidedError(
            function_name=function_name,
            entity_id = self.dataset.id)

    id_from_self = getattr(self, 'id', None)
    id_from_self_parent = getattr(self.dataset, 'id', None ) if getattr(self, 'dataset', None) else None
    
    dataset_id = dataset_id or id_from_self or id_from_self_parent
    
    if not dataset_id:
        raise de.DatasetNotProvidedError(
            function_name = function_name, 
            domo_instance = auth.domo_instance
        )

    return auth, dataset_id

In [5]:
# | export

class DatasetSchema_Types(Enum):
    STRING = 'STRING'
    DOUBLE = 'DOUBLE'
    LONG = 'LONG'
    DATE = 'DATE'
    DATETIME = 'DATETIME'


@dataclass
class DomoDataset_Schema_Column:
    name: str
    id: str
    type: DatasetSchema_Types

    @classmethod
    def _from_json(cls, json_obj):
        dd = util_dd.DictDot(json_obj)
        return cls(name=dd.name, id=dd.id, type=dd.type)


@dataclass
class DomoDataset_Schema:
    """class for interacting with dataset schemas"""

    dataset: any = None
    columns: List[DomoDataset_Schema_Column] = field(default_factory=list)

    async def get(
        self,
        auth: Optional[dmda.DomoAuth] = None,
        dataset_id: str = None,
        debug_api: bool = False,
        return_raw_res: bool = False,  # return the raw response
    ) -> List[DomoDataset_Schema_Column]:

        """method that retrieves schema for a dataset"""

        auth, dataset_id = await _have_prereqs(self = self, auth = auth, dataset_id = dataset_id, function_name = "DomoDataset_Schema.get")

        res = await dataset_routes.get_schema(
            auth=auth, dataset_id=dataset_id, debug_api=debug_api
        )

        if return_raw_res:
            return res.response

        if res.status == 200:
            json_list = res.response.get("tables")[0].get("columns")

            self.columns = [
                DomoDataset_Schema_Column._from_json(json_obj=json_obj)
                for json_obj in json_list
            ]

            return self.columns


In [6]:
show_doc(DomoDataset_Schema.get)

---

[source](https://github.com/jaewilson07/domo_library/blob/main/domolibrary/classes/DomoDataset.py#L104){target="_blank" style="float:right; font-size:smaller"}

### DomoDataset_Schema.get

>      DomoDataset_Schema.get
>                              (auth:Optional[domolibrary.client.DomoAuth.DomoAu
>                              th]=None, dataset_id:str=None,
>                              debug_api:bool=False, return_raw_res:bool=False)

method that retrieves schema for a dataset

#### Sample implementation of getting a dataset schema

Standard implementation will be to access the `DomoDataset_Schema` class as the `DomoDataset.schema` property


In [7]:
import os

token_auth = dmda.DomoTokenAuth(
    domo_instance="domo-community", domo_access_token=os.environ["DOMO_DOJO_ACCESS_TOKEN"]
)

ds_schema = DomoDataset_Schema()

await ds_schema.get(auth=token_auth, dataset_id=os.environ["DOJO_DATASET_ID"])

[DomoDataset_Schema_Column(name='Dataset ID', id='Dataset ID', type='STRING'),
 DomoDataset_Schema_Column(name='Name', id='Name', type='STRING'),
 DomoDataset_Schema_Column(name='Description', id='Description', type='STRING'),
 DomoDataset_Schema_Column(name='Row Count', id='Row Count', type='DOUBLE'),
 DomoDataset_Schema_Column(name='Column Count', id='Column Count', type='DOUBLE'),
 DomoDataset_Schema_Column(name='Owner ID', id='Owner ID', type='STRING'),
 DomoDataset_Schema_Column(name='Owner Name', id='Owner Name', type='STRING'),
 DomoDataset_Schema_Column(name='Dataset Created Date/Time', id='Dataset Created Date/Time', type='DATETIME'),
 DomoDataset_Schema_Column(name='DataSet Last Touched Date/Time', id='DataSet Last Touched Date/Time', type='DATETIME'),
 DomoDataset_Schema_Column(name='DataSet Last Updated Date/Time', id='DataSet Last Updated Date/Time', type='DATETIME'),
 DomoDataset_Schema_Column(name='Report Last Run', id='Report Last Run', type='DATETIME'),
 DomoDataset_Sc

## DatasetTags

In [8]:
# | export

class DatasetTags_SetTagsError(Exception):
    """return if DatasetTags request is not successfull"""

    def __init__(self, dataset_id, domo_instance):
        message = f"failed to set tags on dataset - {dataset_id} in {domo_instance}"
        super().__init__(message)


@dataclass
class DomoDataset_Tags:
    """class for interacting with dataset tags"""

    dataset: any = None
    tag_ls: List[str] = field(default_factory=list)

    async def get(
        self,
        dataset_id: str = None,
        auth: Optional[dmda.DomoAuth] = None,
        debug_api: bool = False,
        session: Optional[httpx.AsyncClient] = None,
    ) -> List[str]:  # returns a list of tags
        """gets the existing list of dataset_tags"""

        auth, dataset_id = await _have_prereqs(self = self, auth=auth, dataset_id=dataset_id, function_name="DomoDataset_Tages.get")

        res = await dataset_routes.get_dataset_by_id(
            dataset_id=dataset_id, auth=auth, debug_api=debug_api, session=session
        )

        if res.is_success == False:
            print(res)
            return None

        tag_ls = []

        if res.response.get("tags"):
            tag_ls = json.loads(res.response.get("tags"))
        
        self.tag_ls = tag_ls

        return tag_ls

    async def set(
        self,
        tag_ls: [str],
        dataset_id: str = None,
        auth: Optional[dmda.DomoAuth] = None,
        debug_api: bool = False,
        session: Optional[httpx.AsyncClient] = None,
    ) -> List[str]: # returns a list of tags
        """replaces all tags with a new list of dataset_tags"""

        auth, dataset_id = await _have_prereqs(self = self , auth=auth, dataset_id=dataset_id, function_name="DomoDatasetTags.set")

        res = await dataset_routes.set_dataset_tags(
            auth=auth,
            tag_ls=list(set(tag_ls)),
            dataset_id=dataset_id,
            debug_api=debug_api,
            session=session,
        )

        if res.status != 200:
            raise DatasetTags_SetTagsError(
                dataset_id=dataset_id, domo_instance=auth.domo_instance
            )

        await self.get(dataset_id=dataset_id, auth=auth)

        return self.tag_ls

In [9]:
import os

token_auth = dmda.DomoTokenAuth(
    domo_instance="domo-community", domo_access_token=os.environ["DOMO_DOJO_ACCESS_TOKEN"]
)

ds_tag = DomoDataset_Tags()

await ds_tag.get(auth=token_auth, dataset_id=os.environ["DOJO_DATASET_ID"])


['domostats']

Bad pipe message: %s [b'\x1dD\xe3\x8e\xc7M\x17OEYf\x91)\xf5\xe8\x83"R 8&\xa3(\x19\x83m\xdeTAQ\xa9\xcd\xbam\xb7\xef\xed\xd4\x1e<\x82\x17i\xe9\xfc\xbd\x07\x8f\xfe\x19\xcb\x00\x08\x13\x02\x13\x03\x13\x01\x00\xff\x01\x00\x00\x8f\x00\x00\x00\x0e\x00\x0c\x00\x00\t127.0.0.1\x00\x0b\x00\x04\x03\x00\x01\x02\x00\n\x00', b'\n\x00\x1d\x00\x17\x00\x1e\x00\x19\x00\x18']
Bad pipe message: %s [b"\x06\t'j\xc9\xf2\xa58\x8f\x9e\x89\xfd\xa8\xe7\xd1@(b \xd4\x0b=Lz\xc8`]\xb5\x8b\xf3\xf7\x0e\xd0,\xbf&\xfb_E\xa6h\xac\xee\xa0x`ff\xa6*\xe3\x00\x08\x13\x02\x13\x03\x13\x01\x00\xff\x01\x00\x00\x8f\x00\x00\x00\x0e\x00\x0c\x00\x00\t1"]
Bad pipe message: %s [b'.0.0.1\x00\x0b\x00\x04\x03\x00\x01\x02\x00\n\x00\x0c\x00\n\x00\x1d\x00\x17\x00\x1e\x00\x19\x00\x18\x00#\x00\x00\x00\x16\x00\x00\x00\x17\x00\x00\x00\r\x00\x1e\x00\x1c\x04', b'\x03\x06', b'\x07\x08']
Bad pipe message: %s [b'\t\x08\n\x08\x0b\x08\x04']
Bad pipe message: %s [b'\x08\x06\x04\x01\x05\x01\x06', b'']
Bad pipe message: %s [b'\x03\x02\x03\x04\x00-\x00\x02\

In [10]:
import os
import datetime as dt

token_auth = dmda.DomoTokenAuth(
    domo_instance="domo-community", domo_access_token=os.environ["DOMO_DOJO_ACCESS_TOKEN"]
)

today = dt.datetime.now().strftime("%b-%d-%Y %H:%M")

ds_tag = DomoDataset_Tags()

await ds_tag.set(
    auth=token_auth,
    dataset_id=os.environ["DOJO_DATASET_ID"],
    tag_ls=["developer_documentation", "hackercore", today],
)

['Apr-27-2023 08:02', 'developer_documentation', 'hackercore']

In [None]:
#| export

@patch_to(DomoDataset_Tags)
async def add(
    self: DomoDataset_Tags,
    add_tag_ls: [str],
    dataset_id: str = None,
    auth: Optional[dmda.DomoAuth] = None,
    debug_api: bool = False,
    session: Optional[httpx.AsyncClient] = None,
) -> List[str]:  # returns a list of tags
    """appends tags to the list of existing dataset_tags"""

    auth, dataset_id = await _have_prereqs(self = self, auth=auth, dataset_id=dataset_id, function_name = "DomoDataset_Tags.add")

    existing_tag_ls = await self.get(dataset_id=dataset_id, auth=auth) or []
    
    add_tag_ls += existing_tag_ls

    return await self.set(
        auth=auth,
        dataset_id=dataset_id,
        tag_ls=list(set(add_tag_ls)),
        debug_api=debug_api,
        session=session,
    )

In [None]:
import os
import datetime as dt

token_auth = dmda.DomoTokenAuth(
    domo_instance="domo-community", domo_access_token=os.environ["DOMO_DOJO_ACCESS_TOKEN"]
)

today_year = dt.datetime.today().strftime("%Y")
ds_tag = DomoDataset_Tags()
await ds_tag.add(
    auth=token_auth, dataset_id=os.environ["DOJO_DATASET_ID"], add_tag_ls=[today_year]
)

['2023', 'developer_documentation', 'hackercore', 'Apr-04-2023 22:22']

In [None]:
#| export
@patch_to(DomoDataset_Tags)
async def remove(self: DomoDataset_Tags,
                 remove_tag_ls: [str],
                 dataset_id: str = None,
                 auth: dmda.DomoFullAuth = None,
                 debug_api: bool = False,
                 session: Optional[httpx.AsyncClient] = None
                 ) -> List[str]:  # returns a list of tags
    """removes tags from the existing list of dataset_tags"""

    auth, dataset_id = await _have_prereqs(self = self, auth=auth, dataset_id=dataset_id, function_name = "DomoDataset_Tags.remove")

    existing_tag_ls = await self.get(dataset_id=dataset_id, auth=auth)

    existing_tag_ls = [
        ex for ex in existing_tag_ls if ex not in remove_tag_ls]

    return await self.set(auth=auth,
                          dataset_id=dataset_id,
                          tag_ls=list(set(existing_tag_ls)),
                          debug_api=debug_api, session=session)


#### Sample implementatioin of remove tags

In [None]:
import os
import datetime as dt

token_auth = dmda.DomoTokenAuth(
    domo_instance="domo-community", domo_access_token=os.environ["DOMO_DOJO_ACCESS_TOKEN"]
)

today_year = dt.datetime.today().strftime("%Y")

ds_tag = DomoDataset_Tags()

await ds_tag.remove(
    auth=token_auth, dataset_id=os.environ["DOJO_DATASET_ID"], remove_tag_ls=[ today_year])

['developer_documentation', 'hackercore', 'Apr-04-2023 22:22']

# MAIN - Domo Dataset


In [12]:
# | export
@dataclass
class DomoDataset:
    "interacts with domo datasets"

    auth: dmda.DomoAuth = field(repr=False, default=None)

    id: str = ""
    display_type: str = ""
    data_provider_type: str = ""
    name: str = ""
    description: str = ""
    row_count: int = None
    column_count: int = None

    stream_id: int = None

    owner: dict = field(default_factory=dict)
    formula: dict = field(default_factory=dict)

    schema: DomoDataset_Schema = field(default=None)
    tags: DomoDataset_Tags = field(default=None)

    # certification: dmdc.DomoCertification = None
    # PDPPolicies: dmpdp.Dataset_PDP_Policies = None

    def __post_init__(self):
        self.schema = DomoDataset_Schema(dataset=self)
        self.tags = DomoDataset_Tags(dataset=self)

        #self.PDPPolicies = dmpdp.Dataset_PDP_Policies(dataset=self)

    def display_url(self):
        return f"https://{self.auth.domo_instance }.domo.com/datasources/{self.id}/details/overview"

#### sample class-based implementation of get schema.


In [None]:
# this sample returns raw response from the api

import os
import pandas as pd

token_auth = dmda.DomoTokenAuth(
    domo_instance="domo-community", domo_access_token=os.environ["DOMO_DOJO_ACCESS_TOKEN"]
)

ds = DomoDataset(auth=token_auth, id=os.environ["DOJO_DATASET_ID"])

raw_res = await ds.schema.get(return_raw_res=True)

pd.DataFrame(raw_res.get("tables")[0].get("columns"))


Unnamed: 0,name,id,type,visible,order
0,objectID,objectID,STRING,True,0
1,url,url,STRING,True,0
2,Title,Title,STRING,True,0
3,article,article,STRING,True,0
4,views,views,LONG,True,0
5,created_dt,created_dt,DATETIME,True,0
6,published_dt,published_dt,DATETIME,True,0


In [None]:
# this sample returns class-based response from the api
import os
import pandas as pd

token_auth = dmda.DomoTokenAuth(
    domo_instance="domo-community", domo_access_token=os.environ["DOMO_DOJO_ACCESS_TOKEN"]
)

ds = DomoDataset(auth=token_auth, id=os.environ["DOJO_DATASET_ID"])

await ds.schema.get()


[DomoDataset_Schema_Column(name='objectID', id='objectID', type='STRING'),
 DomoDataset_Schema_Column(name='url', id='url', type='STRING'),
 DomoDataset_Schema_Column(name='Title', id='Title', type='STRING'),
 DomoDataset_Schema_Column(name='article', id='article', type='STRING'),
 DomoDataset_Schema_Column(name='views', id='views', type='LONG'),
 DomoDataset_Schema_Column(name='created_dt', id='created_dt', type='DATETIME'),
 DomoDataset_Schema_Column(name='published_dt', id='published_dt', type='DATETIME')]

In [None]:
# | export


@patch_to(DomoDataset, cls_method=True)
async def get_from_id(
    cls: DomoDataset,
    dataset_id: str,
    auth: dmda.DomoAuth,
    debug_api: bool = False,
    return_raw_res: bool = False,
    session : httpx.AsyncClient = None,
):

    """retrieves dataset metadata"""

    res = await dataset_routes.get_dataset_by_id(
        auth=auth, dataset_id=dataset_id, debug_api=debug_api, session = session
    )

    if return_raw_res:
        return res.response

    dd = util_dd.DictDot(res.response)
    ds = cls(
        auth=auth,
        id=dd.id,
        display_type=dd.displayType,
        data_provider_type=dd.dataProviderType,
        name=dd.name,
        description=dd.description,
        owner=res.response.get('owner'),
        stream_id=dd.streamId,
        row_count=int(dd.rowCount),
        column_count=int(dd.columnCount),
    )
    
    if dd.properties.formulas.formulas.__dict__ :
        # print(dd.properties.formulas.formulas.__dict__)
        ds.formula=res.response.get('properties').get('formulas').get('formulas')

    if dd.tags:
        ds.tags.tag_ls = json.loads(dd.tags)

    # if dd.certification:
    #     # print('class def certification', dd.certification)
    #     ds.certification = dmdc.DomoCertification._from_json(
    #         dd.certification)

    return ds


#### sample implementation of get_from_id


In [None]:
import os

token_auth = dmda.DomoTokenAuth(
    domo_instance="domo-community", domo_access_token=os.environ["DOMO_DOJO_ACCESS_TOKEN"]
)

try:
    await DomoDataset.get_from_id(auth=token_auth, dataset_id="123")
except Exception as e:
    print(e)

dataset - 123 not found in domo-community


In [None]:
import os
import pandas as pd

token_auth = dmda.DomoTokenAuth(
    domo_instance="domo-community", domo_access_token=os.environ["DOMO_DOJO_ACCESS_TOKEN"]
)
dataset_id = os.environ["DOJO_DATASET_ID"]
# dataset_id = 'da552832-c04d-46ac-936a-f982d9d3f2e6'

res = await DomoDataset.get_from_id(auth=token_auth, dataset_id= dataset_id)

await res.tags.get()

from pprint import pprint
# pd.DataFrame([res])

pprint(res)

DomoDataset(id='04c1574e-c8be-4721-9846-c6ffa491144b',
            display_type='domo-jupyterdata',
            data_provider_type='domo-jupyterdata',
            name='domo_kbs',
            description=None,
            row_count=1185,
            column_count=7,
            stream_id=825,
            owner={'group': False,
                   'id': '1893952720',
                   'name': 'Jae Wilson1',
                   'type': 'USER'},
            formula={'calculation_38846559-d190-4ab1-809b-bcd361db5670': {'bignumber': False,
                                                                          'columnPositions': [{'columnName': 'views',
                                                                                               'columnPosition': 4}],
                                                                          'dataType': 'LONG',
                                                                          'formula': 'max(views)',
                                

In [None]:
# | export

class QueryExecutionError(de.DomoError):
    def __init__(self,
                 sql, dataset_id,
                 domo_instance,
                 status, message,
                 function_name=None ):
        
        self.message = f"error executing {sql}: {message}"

        super().__init__(entity_id=dataset_id,
                         function_name=function_name,
                         status=status,
                         message=message,
                         domo_instance=domo_instance)


@patch_to(DomoDataset, cls_method=True)
async def query_dataset_private(cls: DomoDataset,
                                auth: dmda.DomoAuth,  # DomoFullAuth or DomoTokenAuth
                                dataset_id: str,
                                sql: str,
                                session: Optional[httpx.AsyncClient] = None,
                                loop_until_end: bool = False,  # retrieve all available rows
                                limit=100,  # maximum rows to return per request.  refers to PAGINATION
                                skip=0,
                                maximum=100,  # equivalent to the LIMIT or TOP clause in SQL, the number of rows to return total
                                debug_api: bool = False,
                                debug_loop: bool = False,
                                timeout = 10 # larger API requests may require a longer response time
                                ) -> pd.DataFrame:

    res = await dataset_routes.query_dataset_private(auth=auth,
                                                     dataset_id=dataset_id,
                                                     sql=sql,
                                                     maximum=maximum,
                                                     skip=skip,
                                                     limit=limit,
                                                     loop_until_end=loop_until_end,
                                                     session=session,
                                                     debug_loop=debug_loop,
                                                     debug_api=debug_api,
                                                     timeout = timeout
                                                     )

    if not res.is_success:
        raise QueryExecutionError(
            status=res.status, message=res.response,
            function_name="query_dataset_private", 
            sql=sql, dataset_id=dataset_id, domo_instance=auth.domo_instance)

    return pd.DataFrame(res.response)


In [None]:
# | export
class DomoDataset_DeleteDataset_Error(de.DomoError):
    def __init__(self,
                 dataset_id,
                 status, reason,
                 domo_instance,
                 function_name
                 ):

        super().__init__(entity_id=dataset_id,
                         function_name=function_name,
                         status=status,
                         message=reason,
                         domo_instance=domo_instance)


@patch_to(DomoDataset)
async def delete(self: DomoDataset,
                 dataset_id=None,
                 auth: dmda.DomoAuth = None,
                 debug_api: bool = False,
                 session: httpx.AsyncClient = None):

    dataset_id = dataset_id or self.id
    auth = auth or self.auth

    res = await dataset_routes.delete(
        auth=auth,
        dataset_id=dataset_id,
        debug_api=debug_api,
        session=session)

    if not res.is_success:
        raise DomoDataset_DeleteDataset_Error(
            dataset_id=dataset_id, 
            function_name="DomoDataset.delete",
            domo_instance=auth.domo_instance, 
            status=res.status, reason=res.response)

    return res


In [None]:
#     @classmethod
#     async def query_dataset(cls,
#                             sql: str,
#                             dataset_id: str,
#                             dev_auth: DomoDeveloperAuth,
#                             debug_api: bool = False,
#                             session: httpx.AsyncClient = None) -> pd.DataFrame:

#         if debug_api:
#             print("query dataset class method")
#             print({'dataset_id': dataset_id,
#                    'dev_auth': dev_auth})

#         res = await dataset_routes.query_dataset_public(dev_auth=dev_auth, id=dataset_id, sql=sql, session=session,
#                                                         debug=debug)

#         if debug_api:
#             print(res.response)

#         if res.status == 200:
#             df = pd.DataFrame(data=res.response.get('rows'),
#                               columns=res.response.get('columns'))
#             return df
#         return None


## Upload Data

In [None]:
# | export
class DomoDataset_UploadData_Error(Exception):

    def __init__(self,
                 message_error: str,
                 domo_instance: str,
                 dataset_id: str,
                 stage: int,
                 status="", reason="",
                 partition_key: str = None):

        message_start = f"Stage {stage}:: {message_error} :: API {status} - {reason} :: "
        message_end = f"in {dataset_id} in {domo_instance}"

        message_partition = ""
        if partition_key:
            message_partition = f"for partition - '{partition_key}' "

        message = f"{message_start}{message_partition}{message_end}"

        super().__init__(message)


class DomoDataset_UploadData_DatasetUploadId_Error(DomoDataset_UploadData_Error):
    def __init__(self, domo_instance: str, dataset_id: str,
                 stage: int = 1, status="", reason="",
                 partition_key: str = None):

        message_error = "unable to retrieve dataset_upload_id"

        super().__init__(message_error=message_error,
                         domo_instance=domo_instance, dataset_id=dataset_id,
                         stage=stage, status=status, reason=reason,
                         partition_key=partition_key)


class DomoDataset_UploadData_UploadData_Error(DomoDataset_UploadData_Error):
    def __init__(self, domo_instance: str, dataset_id: str,
                 stage: int = 2, status="", reason="",
                 partition_key: str = None):

        message_error = "while uploading data"

        super().__init__(message_error=message_error,
                         domo_instance=domo_instance, dataset_id=dataset_id,
                         stage=stage, status=status, reason=reason,
                         partition_key=partition_key)

class DomoDataset_UploadData_CommitDatasetUploadId_Error(DomoDataset_UploadData_Error):
    def __init__(self, domo_instance: str, dataset_id: str,
                    stage: int = 3, status="", reason="",
                    partition_key: str = None):

        message_error = "while commiting dataset_upload_id"

        super().__init__(message_error=message_error,
                            domo_instance=domo_instance, dataset_id=dataset_id,
                            stage=stage, status=status, reason=reason,
                            partition_key=partition_key)


In [None]:
#| export
@patch_to(DomoDataset)
async def index_dataset(self: DomoDataset,
                        auth: dmda.DomoAuth = None,
                        dataset_id: str = None,
                        debug_api: bool = False,
                        session: httpx.AsyncClient = None
                        ):

    auth = auth or self.auth
    dataset_id = dataset_id or self.id
    return await dataset_routes.index_dataset(auth=auth, dataset_id=dataset_id, debug_api=debug_api,
                                              session=session)


In [None]:
#| export
@patch_to(DomoDataset)
async def upload_data(self : DomoDataset,
                      upload_df: pd.DataFrame = None,
                      upload_df_ls: list[pd.DataFrame] = None,
                      upload_file: io.TextIOWrapper = None,

                      upload_method: str = 'REPLACE',  # APPEND or REPLACE
                      partition_key: str = None,

                      is_index: bool = True,

                      dataset_id: str = None,
                      dataset_upload_id=None,

                      auth: dmda.DomoAuth = None,

                      session: httpx.AsyncClient = None,
                      debug_api: bool = False,
                      debug_prn: bool = False
                      ):

    auth, dataset_id = await _have_prereqs(self = self, auth = auth, dataset_id=dataset_id, function_name= "upload_data")

    upload_df_ls = upload_df_ls or [upload_df]

    status_message = f"{dataset_id} {partition_key} | {auth.domo_instance}"

    # stage 1 get uploadId
    if not dataset_upload_id:
        if debug_prn:
            print(f"\n\n🎭 starting Stage 1 - {status_message}")

        stage_1_res = await dataset_routes.upload_dataset_stage_1(auth=auth,
                                                                  dataset_id=dataset_id,
                                                                  session=session,
                                                                  partition_tag=partition_key,
                                                                  debug_api=debug_api
                                                                  )
        if debug_prn:
            print(f"\n\n🎭 Stage 1 response -- {stage_1_res.status} for {status_message}")

        dataset_upload_id = stage_1_res.response.get('uploadId')

    if not dataset_upload_id:
        raise DomoDataset_UploadData_DatasetUploadId_Error(
            domo_instance=auth.domo_instance,  dataset_id=dataset_id, stage=1, partition_key=partition_key,
            status=stage_1_res.status, reason=stage_1_res.response)

    # stage 2 upload_dataset
    stage_2_res = None

    if upload_file:
        if debug_prn:
            print(f"\n\n🎭 starting Stage 2 - upload file for {status_message}")

        stage_2_res = await asyncio.gather(*[dataset_routes.upload_dataset_stage_2_file(auth=auth,
                                                                                        dataset_id=dataset_id,
                                                                                        upload_id=dataset_upload_id,
                                                                                        part_id=1,
                                                                                        data_file=upload_file,
                                                                                        session=session, debug_api=debug_api)])

    else:
        if debug_prn:
            print(
                f"\n\n🎭 starting Stage 2 - {len(upload_df_ls)} - number of parts for {status_message}")
        stage_2_res = await asyncio.gather(*[dataset_routes.upload_dataset_stage_2_df(auth=auth,
                                                                                      dataset_id=dataset_id,
                                                                                      upload_id=dataset_upload_id,
                                                                                      part_id=index + 1,
                                                                                      upload_df=df,
                                                                                      session=session, debug_api=debug_api) for index, df in enumerate(upload_df_ls)])

    for res in stage_2_res:
        if not res.is_success:
            raise DomoDataset_UploadData_UploadData_Error(
                domo_instance=auth.domo_instance, dataset_id=dataset_id, stage=2, partition_key=partition_key,
                status=res.status, reason=res.response)

    if debug_prn:
        print(f"🎭 Stage 2 - upload data: complete for {status_message}")

    # stage 3 commit_data
    if debug_prn:
        print(f"\n\n🎭 starting Stage 3 - commit dataset_upload_id for {status_message}")

    await asyncio.sleep(10)  # wait for uploads to finish
    stage3_res = await dataset_routes.upload_dataset_stage_3(auth=auth,
                                                             dataset_id=dataset_id,
                                                             upload_id=dataset_upload_id,
                                                             update_method=upload_method,
                                                             partition_tag=partition_key,
                                                             is_index=False,
                                                             session=session,
                                                             debug_api=debug_api)

    if not stage3_res.is_success:
        raise DomoDataset_UploadData_CommitDatasetUploadId_Error(
            domo_instance=auth.domo_instance, dataset_id=dataset_id, partition_key=partition_key, stage=3,
            status=stage3_res.status, reason=stage3_res.response)

    if debug_prn:
        print(f"\n🎭 stage 3 - commit dataset: complete for {status_message} ")

    if is_index:
        await asyncio.sleep(3)
        return await self.index_dataset(auth=auth,
                                        dataset_id=dataset_id,
                                        debug_api=debug_api,
                                        session=session)

    return stage3_res


## Partitions

In [None]:
#| export
@patch_to(DomoDataset)
async def list_partitions(self : DomoDataset,
                            auth: dmda.DomoAuth = None,
                            dataset_id: str = None,
                            debug_api: bool = False,
                            session: httpx.AsyncClient = None
                            ):

    auth = auth or self.auth
    dataset_id = dataset_id or self.id

    res = await dataset_routes.list_partitions(auth=auth, dataset_id=dataset_id, debug_api=debug_api,
                                                session=session)
    if res.status != 200:
        return None

    return res.response

In [None]:
import os
import pandas as pd

token_auth = dmda.DomoTokenAuth(
    domo_instance="domo-community", domo_access_token=os.environ["DOMO_DOJO_ACCESS_TOKEN"]
)

dataset_id = 'd2b21660-4ba8-400c-badf-aeef5a9abae1'

ds = await DomoDataset.get_from_id(auth=token_auth, dataset_id=dataset_id)
ds_partition_ls = await ds.list_partitions()

pd.DataFrame(ds_partition_ls[0:5])

Unnamed: 0,dataId,partitionId,dateCompleted,rowCount
0,372,2013-07-02,2023-01-24T14:27:21.000+00:00,1
1,373,2013-07-01,2023-01-24T14:27:21.000+00:00,1
2,354,2013-07-20,2023-01-24T14:27:20.000+00:00,1
3,355,2013-07-19,2023-01-24T14:27:20.000+00:00,1
4,356,2013-07-18,2023-01-24T14:27:20.000+00:00,1


In [None]:
#| export
class DomoDataset_CreateDataset_Error(Exception):
    def __init__(self, domo_instance: str, dataset_name: str, status: int, reason: str):
        message = f"Failure to create dataset {dataset_name} in {domo_instance} :: {status} - {reason}"
        super().__init__(message)


@patch_to(DomoDataset, cls_method=True)
async def create(cls: DomoDataset,
                 dataset_name: str,
                 dataset_type='api',

                 schema=None,
                 auth: dmda.DomoAuth = None,
                 debug_api: bool = False, 
                 session : httpx.AsyncClient = None
                 ):
    schema = schema or {"columns": [
        {"name": 'col1', "type": 'LONG', "upsertKey": False},
        {"name": 'col2', "type": 'STRING', "upsertKey": False}
    ]}
    

    res = await dataset_routes.create(dataset_name=dataset_name,
                                      dataset_type=dataset_type,
                                      schema=schema, auth=auth, debug_api=debug_api, session=session
                                      )

    if not res.is_success:
        raise DomoDataset_CreateDataset_Error(
            domo_instance=auth.domo_instance, dataset_name=dataset_name, 
            status=res.status, reason=res.response)

    dataset_id = res.response.get('dataSource').get('dataSourceId')

    return await cls.get_from_id(dataset_id=dataset_id, auth=auth)


#### sample implementation of create dataset

In [None]:
import os

token_auth = dmda.DomoTokenAuth(
    domo_instance="domo-community", domo_access_token=os.environ["DOMO_DOJO_ACCESS_TOKEN"]
)

# await DomoDataset.create( dataset_name= 'Hello world_v2', dataset_type='API', auth = token_auth, debug_api = False)

In [None]:
#     async def delete_partition(self,
#                                dataset_partition_id: str,

#                                dataset_id: str = None,
#                                empty_df: pd.DataFrame = None,

#                                auth: DomoFullAuth = None,

#                                is_index: bool = True,
#                                debug_api: bool = False,
#                                session: httpx.AsyncClient = None):

#         is_close_session = True if not session else False

#         session = session or httpx.AsyncClient()
#         auth = auth or self.auth
#         dataset_id = dataset_id or self.id

# #        if empty_df is None:
# #            empty_df = await self.query_dataset_private(auth=auth,
# #                                                        dataset_id=dataset_id,
# #                                                        sql="SELECT * from table limit 1",
# #                                                        debug=False)
# #
# #        await self.upload_csv(upload_df=empty_df.head(0),
# #                              upload_method='REPLACE',
# #                              is_index=is_index,
# #                              partition_key=dataset_partition_id,
# #                              session=session,
# #                              debug=False)
#         if debug_api:
#             print(f"\n\n🎭 starting Stage 1")

#         res = await dataset_routes.delete_partition_stage_1(auth=auth,
#                                                             dataset_id=dataset_id,
#                                                             dataset_partition_id=dataset_partition_id,
#                                                             debug=debug, session=session)
#         if debug_api:
#             print(f"\n\n🎭 Stage 1 response -- {res.status}")
#             print(res)

#         stage_2_res = None
#         if debug_api:
#             print('starting Stage 2')
#         stage_2_res = await dataset_routes.delete_partition_stage_2(auth=auth,
#                                                                     dataset_id=dataset_id,
#                                                                     dataset_partition_id=dataset_partition_id,
#                                                                     debug=debug, session=session)
#         if debug_api:
#             print(f"\n\n🎭 Stage 2 response -- {stage_2_res.status}")

#         stage_3_res = None
#         if debug_api:
#             print('starting Stage 3')
#         stage_3_res = await dataset_routes.index_dataset(auth=auth,
#                                                          dataset_id=dataset_id,
#                                                          debug=debug, session=session)
#         if debug_api:
#             print(f"\n\n🎭 Stage 3 response -- {stage_3_res.status}")

#         if is_close_session:
#             await session.aclose()

#         if debug_api:
#             print(stage_3_res)

#         if stage_3_res.status == 200:
#             return res.response

In [None]:
#     async def reset_dataset(self,
#                             auth: DomoFullAuth = None,
#                             is_index: bool = True,
#                             debug_api: bool = False
#                             ):
#         execute_reset = input(
#             "This function will delete all rows.  Type BLOW_ME_AWAY to execute:")

#         if execute_reset != 'BLOW_ME_AWAY':
#             print("You didn't type BLOW_ME_AWAY, moving on.")
#             return None

#         auth = auth or self.auth
#         dataset_id = self.id

#         if not auth:
#             raise Exception("auth required")

#         session = httpx.AsyncClient()

#         # create empty dataset to retain schema
#         empty_df = await self.query_dataset_private(auth=auth,
#                                                     dataset_id=dataset_id,
#                                                     sql="SELECT * from table limit 1",
#                                                     session=session,
#                                                     debug=debug)
#         empty_df = empty_df.head(0)

#         # get partition list
# #         partition_list = await dataset_routes.list_partitions(auth=auth,
# #                                                               dataset_id=self.id,
# #                                                               debug=debug,
# #                                                               session=session)

# #         if len(partition_list) > 0:
# #             partition_list = chunk_list(partition_list, 100)

# #             for index, pl in enumerate(partition_list):
# #                 print(f'🥫 starting chunk {index + 1} of {len(partition_list)}')

# #                 await asyncio.gather(*[self.delete_partition(auth=auth,
# #                                                              dataset_partition_id=partition.get('partitionId'),
# #                                                              session=session,
# #                                                              empty_df=empty_df,
# #                                                              debug=False) for partition in pl])
# #                 if is_index:
# #                     await self.index_dataset(session=session)

#         res = await self.upload_csv(upload_df=empty_df,
#                                     upload_method='REPLACE',
#                                     is_index=is_index,
#                                     session=session,
#                                     debug=False)

#         await session.aclose()
#         return True


In [None]:
# | hide
import nbdev

nbdev.nbdev_export()
