<!-- WARNING: THIS FILE WAS AUTOGENERATED! DO NOT EDIT! -->

## Component Classes

The [`DomoDataset_Schema`](https://jaewilson07.github.io/domo_library/classes/domodataset.html#domodataset_schema) class will be a subclass of [`DomoDataset`](https://jaewilson07.github.io/domo_library/classes/domodataset.html#domodataset). It will handle all of the methods for interacting with schemas.

- In execution, the schema is separate from the data that gets uploaded from Vault to Adrenaline. The domo schema defines how the data is loaded into Vault.
- Be cognizant to match dataset uploads with schema definitions. If the schema and uploaded data types do not match, the dataset may be unable to index in Adrenaline (and therefore not update).

In [1]:
#| echo: false
#| output: asis
show_doc(DomoDataset_Schema)

---

[source](https://github.com/jaewilson07/domo_library/blob/main/domolibrary/classes/DomoDataset.py#L72){target="_blank" style="float:right; font-size:smaller"}

### DomoDataset_Schema

>      DomoDataset_Schema (dataset:<built-infunctionany>=None,
>                          columns:List[__main__.DomoDataset_Schema_Column]=<fac
>                          tory>)

class for interacting with dataset schemas

In [2]:
#| echo: false
#| output: asis
show_doc(DomoDataset_Schema_Column)

---

[source](https://github.com/jaewilson07/domo_library/blob/main/domolibrary/classes/DomoDataset.py#L60){target="_blank" style="float:right; font-size:smaller"}

### DomoDataset_Schema_Column

>      DomoDataset_Schema_Column (name:str, id:str, type:str)

In [3]:
#| echo: false
#| output: asis
show_doc(DatasetSchema_DatasetNotProvidedError)

---

[source](https://github.com/jaewilson07/domo_library/blob/main/domolibrary/classes/DomoDataset.py#L51){target="_blank" style="float:right; font-size:smaller"}

### DatasetSchema_DatasetNotProvidedError

>      DatasetSchema_DatasetNotProvidedError ()

return if DatasetSchema request does not have a dataset id

In [4]:
#| echo: false
#| output: asis
show_doc(DatasetSchema_AuthNotProvidedError)

---

[source](https://github.com/jaewilson07/domo_library/blob/main/domolibrary/classes/DomoDataset.py#L43){target="_blank" style="float:right; font-size:smaller"}

### DatasetSchema_AuthNotProvidedError

>      DatasetSchema_AuthNotProvidedError (dataset_id)

return if DatasetSchema request cannot access an auth object

In [5]:
#| echo: false
#| output: asis
show_doc(DomoDataset_Schema.get)

---

[source](https://github.com/jaewilson07/domo_library/blob/main/domolibrary/classes/DomoDataset.py#L78){target="_blank" style="float:right; font-size:smaller"}

### DomoDataset_Schema.get

>      DomoDataset_Schema.get
>                              (auth:Optional[domolibrary.client.DomoAuth.DomoAu
>                              th]=None, dataset_id:str=None,
>                              debug_api:bool=False, return_raw_res:bool=False)

method that retrieves schema for a dataset

#### Sample implementation of getting a dataset schema

Standard implementation will be to access the [`DomoDataset_Schema`](https://jaewilson07.github.io/domo_library/classes/domodataset.html#domodataset_schema) class as the `DomoDataset.schema` property

In [6]:
import os

token_auth = dmda.DomoTokenAuth(
    domo_instance="domo-dojo", domo_access_token=os.environ["DOMO_DOJO_ACCESS_TOKEN"]
)

ds_schema = DomoDataset_Schema()

await ds_schema.get(auth=token_auth, dataset_id=os.environ["DOJO_DATASET_ID"])

[DomoDataset_Schema_Column(name='objectID', id='objectID', type='STRING'),
 DomoDataset_Schema_Column(name='url', id='url', type='STRING'),
 DomoDataset_Schema_Column(name='Title', id='Title', type='STRING'),
 DomoDataset_Schema_Column(name='article', id='article', type='STRING'),
 DomoDataset_Schema_Column(name='views', id='views', type='LONG'),
 DomoDataset_Schema_Column(name='created_dt', id='created_dt', type='DATETIME'),
 DomoDataset_Schema_Column(name='published_dt', id='published_dt', type='DATETIME')]

In [7]:
#| echo: false
#| output: asis
show_doc(DomoDataset_Tags)

---

[source](https://github.com/jaewilson07/domo_library/blob/main/domolibrary/classes/DomoDataset.py#L133){target="_blank" style="float:right; font-size:smaller"}

### DomoDataset_Tags

>      DomoDataset_Tags (dataset:<built-infunctionany>=None,
>                        tag_ls:List[str]=<factory>)

class for interacting with dataset tags

In [8]:
#| echo: false
#| output: asis
show_doc(DatasetTags_SetTagsError)

---

[source](https://github.com/jaewilson07/domo_library/blob/main/domolibrary/classes/DomoDataset.py#L124){target="_blank" style="float:right; font-size:smaller"}

### DatasetTags_SetTagsError

>      DatasetTags_SetTagsError (dataset_id, domo_instance)

return if DatasetTags request is not successfull

In [9]:
#| echo: false
#| output: asis
show_doc(DatasetTags_AuthNotProvidedError)

---

[source](https://github.com/jaewilson07/domo_library/blob/main/domolibrary/classes/DomoDataset.py#L116){target="_blank" style="float:right; font-size:smaller"}

### DatasetTags_AuthNotProvidedError

>      DatasetTags_AuthNotProvidedError (id)

return if DatasetTags request cannot access an auth object

In [10]:
import os

token_auth = dmda.DomoTokenAuth(
    domo_instance="domo-dojo", domo_access_token=os.environ["DOMO_DOJO_ACCESS_TOKEN"]
)

ds_tag = DomoDataset_Tags()
await ds_tag.get(auth=token_auth, dataset_id=os.environ["DOJO_DATASET_ID"])

['Jan-11-2023 13:17', '2023', 'developer_documentation', 'hackercore']

In [11]:
import os
import datetime as dt

token_auth = dmda.DomoTokenAuth(
    domo_instance="domo-dojo", domo_access_token=os.environ["DOMO_DOJO_ACCESS_TOKEN"]
)

today = dt.datetime.now().strftime("%b-%d-%Y %H:%M")
ds_tag = DomoDataset_Tags()
await ds_tag.set(
    auth=token_auth,
    dataset_id=os.environ["DOJO_DATASET_ID"],
    tag_ls=["developer_documentation", "hackercore", today],
)

['developer_documentation', 'hackercore']

In [12]:
#| echo: false
#| output: asis
show_doc(DomoDataset_Tags.add)

---

[source](https://github.com/jaewilson07/domo_library/blob/main/domolibrary/classes/DomoDataset.py#L210){target="_blank" style="float:right; font-size:smaller"}

### DomoDataset_Tags.add

>      DomoDataset_Tags.add (add_tag_ls:[<class'str'>], dataset_id:str=None,
>                            auth:Optional[domolibrary.client.DomoAuth.DomoAuth]
>                            =None, debug_api:bool=False, session:Optional[aioht
>                            tp.client.ClientSession]=None)

appends tags to the list of existing dataset_tags

In [13]:
import os
import datetime as dt

token_auth = dmda.DomoTokenAuth(
    domo_instance="domo-dojo", domo_access_token=os.environ["DOMO_DOJO_ACCESS_TOKEN"]
)

today_year = dt.datetime.today().strftime("%Y")
ds_tag = DomoDataset_Tags()
await ds_tag.add(
    auth=token_auth, dataset_id=os.environ["DOJO_DATASET_ID"], add_tag_ls=[today_year]
)

['2023', 'developer_documentation', 'hackercore']

In [14]:
#| echo: false
#| output: asis
show_doc(DomoDataset_Tags.remove)

---

[source](https://github.com/jaewilson07/domo_library/blob/main/domolibrary/classes/DomoDataset.py#L235){target="_blank" style="float:right; font-size:smaller"}

### DomoDataset_Tags.remove

>      DomoDataset_Tags.remove (remove_tag_ls:[<class'str'>],
>                               dataset_id:str=None, auth:domolibrary.client.Dom
>                               oAuth.DomoFullAuth=None, debug_api:bool=False, s
>                               ession:Optional[aiohttp.client.ClientSession]=No
>                               ne)

removes tags from the existing list of dataset_tags

In [15]:
import os
import datetime as dt

token_auth = dmda.DomoTokenAuth(
    domo_instance="domo-dojo", domo_access_token=os.environ["DOMO_DOJO_ACCESS_TOKEN"]
)

today_year = dt.datetime.today().strftime("%Y")

ds_tag = DomoDataset_Tags()

await ds_tag.remove(
    auth=token_auth, dataset_id=os.environ["DOJO_DATASET_ID"], remove_tag_ls=[ today_year])

['developer_documentation', 'hackercore']

# MAIN - Domo Dataset

In [16]:
#| echo: false
#| output: asis
show_doc(DomoDataset)

---

[source](https://github.com/jaewilson07/domo_library/blob/main/domolibrary/classes/DomoDataset.py#L259){target="_blank" style="float:right; font-size:smaller"}

### DomoDataset

>      DomoDataset (auth:domolibrary.client.DomoAuth.DomoAuth=None, id:str='',
>                   display_type:str='', data_provider_type:str='', name:str='',
>                   description:str='', row_count:int=None,
>                   column_count:int=None, stream_id:int=None,
>                   owner:dict=<factory>, formula:dict=<factory>,
>                   schema:__main__.DomoDataset_Schema=None)

interacts with domo datasets

#### sample class-based implementation of get schema.

In [17]:
# this sample returns raw response from the api

import os
import pandas as pd

token_auth = dmda.DomoTokenAuth(
    domo_instance="domo-dojo", domo_access_token=os.environ["DOMO_DOJO_ACCESS_TOKEN"]
)

ds = DomoDataset(auth=token_auth, id=os.environ["DOJO_DATASET_ID"])

raw_res = await ds.schema.get(return_raw_res=True)

pd.DataFrame(raw_res.get("tables")[0].get("columns"))

Unnamed: 0,name,id,type,visible,order
0,objectID,objectID,STRING,True,0
1,url,url,STRING,True,0
2,Title,Title,STRING,True,0
3,article,article,STRING,True,0
4,views,views,LONG,True,0
5,created_dt,created_dt,DATETIME,True,0
6,published_dt,published_dt,DATETIME,True,0


In [18]:
# this sample returns class-based response from the api
import os
import pandas as pd

token_auth = dmda.DomoTokenAuth(
    domo_instance="domo-dojo", domo_access_token=os.environ["DOMO_DOJO_ACCESS_TOKEN"]
)

ds = DomoDataset(auth=token_auth, id=os.environ["DOJO_DATASET_ID"])

await ds.schema.get()

[DomoDataset_Schema_Column(name='objectID', id='objectID', type='STRING'),
 DomoDataset_Schema_Column(name='url', id='url', type='STRING'),
 DomoDataset_Schema_Column(name='Title', id='Title', type='STRING'),
 DomoDataset_Schema_Column(name='article', id='article', type='STRING'),
 DomoDataset_Schema_Column(name='views', id='views', type='LONG'),
 DomoDataset_Schema_Column(name='created_dt', id='created_dt', type='DATETIME'),
 DomoDataset_Schema_Column(name='published_dt', id='published_dt', type='DATETIME')]

In [19]:
#| echo: false
#| output: asis
show_doc(DomoDataset.get_from_id)

---

[source](https://github.com/jaewilson07/domo_library/blob/main/domolibrary/classes/DomoDataset.py#L294){target="_blank" style="float:right; font-size:smaller"}

### DomoDataset.get_from_id

>      DomoDataset.get_from_id (dataset_id:str,
>                               auth:domolibrary.client.DomoAuth.DomoAuth,
>                               debug_api:bool=False, return_raw_res:bool=False)

retrieves dataset metadata

#### sample implementation of get_from_id

In [None]:
import os

token_auth = dmda.DomoTokenAuth(
    domo_instance="domo-dojo", domo_access_token=os.environ["DOMO_DOJO_ACCESS_TOKEN"]
)

try:
    await DomoDataset.get_from_id(auth=token_auth, dataset_id="123")
except Exception as e:
    print(e)

dataset - 123 not found in domo-dojo


In [20]:
import os
import pandas as pd

token_auth = dmda.DomoTokenAuth(
    domo_instance="domo-dojo", domo_access_token=os.environ["DOMO_DOJO_ACCESS_TOKEN"]
)

await DomoDataset.get_from_id(auth=token_auth, dataset_id=os.environ["DOJO_DATASET_ID"])

DomoDataset(id='04c1574e-c8be-4721-9846-c6ffa491144b', display_type='domo-jupyterdata', data_provider_type='domo-jupyterdata', name='domo_kbs', description=None, row_count=1185, column_count=7, stream_id=825, owner=DictDot(id='1893952720', name='Jae Wilson', type='USER', group=False), formula=DictDot(), schema=DomoDataset_Schema(dataset=..., columns=[]))

In [None]:
#     @classmethod
#     async def query_dataset(cls,
#                             sql: str,
#                             dataset_id: str,
#                             dev_auth: DomoDeveloperAuth,
#                             debug: bool = False,
#                             session: aiohttp.ClientSession = None) -> pd.DataFrame:

#         if debug:
#             print("query dataset class method")
#             print({'dataset_id': dataset_id,
#                    'dev_auth': dev_auth})

#         res = await dataset_routes.query_dataset_public(dev_auth=dev_auth, id=dataset_id, sql=sql, session=session,
#                                                         debug=debug)

#         if debug:
#             print(res.response)

#         if res.status == 200:
#             df = pd.DataFrame(data=res.response.get('rows'),
#                               columns=res.response.get('columns'))
#             return df
#         return None

#     @classmethod
#     async def query_dataset_private(cls,
#                                     sql: str,
#                                     dataset_id: str,
#                                     full_auth: DomoFullAuth,
#                                     debug: bool = False,
#                                     session: aiohttp.ClientSession = None) -> pd.DataFrame:

#         if debug:
#             print("query dataset class method")
#             print({'dataset_id': dataset_id,
#                    'full_auth': full_auth})

#         res = await dataset_routes.query_dataset_private(full_auth=full_auth, id=dataset_id, sql=sql, session=session,
#                                                          debug=debug)

#         return pd.DataFrame(res)

#     async def upload_csv(self,
#                          upload_df: pd.DataFrame = None,
#                          upload_df_list: list[pd.DataFrame] = None,
#                          upload_file: io.TextIOWrapper = None,

#                          full_auth: DomoFullAuth = None,
#                          upload_method: str = 'REPLACE',
#                          dataset_id: str = None,
#                          dataset_upload_id=None,
#                          partition_key: str = None,
#                          is_index: bool = True,
#                          session: aiohttp.ClientSession = None,
#                          debug: bool = False):

#         full_auth = full_auth or self.full_auth
#         dataset_id = dataset_id or self.id

#         upload_df_list = upload_df_list or [upload_df]

#         # stage 1 get uploadId
#         if not dataset_upload_id:
#             if debug:
#                 print(f"\n\n🎭 starting Stage 1")

#             res = await dataset_routes.upload_dataset_stage_1(full_auth=full_auth,
#                                                               dataset_id=dataset_id,
#                                                               session=session,
#                                                               data_tag=partition_key,
#                                                               debug=debug
#                                                               )
#             if debug:
#                 print(f"\n\n🎭 Stage 1 response -- {res.status}")
#                 print(res)

#             dataset_upload_id = res.response.get('uploadId')

#         # stage 2 upload_dataset

#         if debug:
#             print(
#                 f"\n\n🎭 starting Stage 2 - {len(upload_df_list)} - number of parts")

#         stage_2_res = None

#         if upload_file:
#             if debug:
#                 print('stage 2 - file')
#             stage_2_res = await dataset_routes.upload_dataset_stage_2_file(full_auth=full_auth,
#                                                                            dataset_id=dataset_id,
#                                                                            upload_id=dataset_upload_id,
#                                                                            part_id=1,
#                                                                            file=upload_file,
#                                                                            session=session, debug=debug)
#             if debug:
#                 print(f"🎭 Stage 2 response -- {stage_2_res.status}")
#                 print(stage_2_res.print(is_pretty=True))

#         else:
#             if debug:
#                 print('stage 2 - df')
#             stage_2_res = await asyncio.gather(*[dataset_routes.upload_dataset_stage_2_df(full_auth=full_auth,
#                                                                                           dataset_id=dataset_id,
#                                                                                           upload_id=dataset_upload_id,
#                                                                                           part_id=index + 1,
#                                                                                           upload_df=df,
#                                                                                           session=session, debug=debug) for index, df in enumerate(upload_df_list)])

#             if debug:
#                 for res in stage_2_res:
#                     print(f"🎭 Stage 2 response -- {res.status}")
#                     res.print(is_pretty=True)

#         # return stage_2_res

# #         # stage 3 commit_data
#         if debug:
#             print(f"\n\n🎭 starting Stage 3")
#         await asyncio.sleep(10)

#         stage3_res = await dataset_routes.upload_dataset_stage_3(full_auth=full_auth,
#                                                                  dataset_id=dataset_id,
#                                                                  upload_id=dataset_upload_id,
#                                                                  update_method=upload_method,
#                                                                  data_tag=partition_key,
#                                                                  is_index=False,
#                                                                  session=session,
#                                                                  debug=debug)

#         if debug:
#             print(f"\n🎭 stage 3 res - {res.status}")
#             print(stage3_res)

#         if is_index:
#             await self.index_dataset(full_auth=full_auth,
#                                      dataset_id=dataset_id,
#                                      debug=debug,
#                                      session=session)

#         return stage3_res

#     async def index_dataset(self,
#                             full_auth: DomoFullAuth = None,
#                             dataset_id: str = None,
#                             debug: bool = False,
#                             session: aiohttp.ClientSession = None
#                             ):

#         full_auth = full_auth or self.full_auth
#         dataset_id = dataset_id or self.id
#         return await dataset_routes.index_dataset(full_auth=full_auth, dataset_id=dataset_id, debug=debug,
#                                                   session=session)

#     async def list_partitions(self,
#                               full_auth: DomoFullAuth = None,
#                               dataset_id: str = None,
#                               debug: bool = False,
#                               session: aiohttp.ClientSession = None
#                               ):

#         full_auth = full_auth or self.full_auth
#         dataset_id = dataset_id or self.id

#         res = await dataset_routes.list_partitions(full_auth=full_auth, dataset_id=dataset_id, debug=debug,
#                                                    session=session)
#         if res.status != 200:
#             return None
#         return res.response

#     async def delete_partition(self,
#                                dataset_partition_id: str,

#                                dataset_id: str = None,
#                                empty_df: pd.DataFrame = None,

#                                full_auth: DomoFullAuth = None,

#                                is_index: bool = True,
#                                debug: bool = False,
#                                session: aiohttp.ClientSession = None):

#         is_close_session = True if not session else False

#         session = session or aiohttp.ClientSession()
#         full_auth = full_auth or self.full_auth
#         dataset_id = dataset_id or self.id

# #        if empty_df is None:
# #            empty_df = await self.query_dataset_private(full_auth=full_auth,
# #                                                        dataset_id=dataset_id,
# #                                                        sql="SELECT * from table limit 1",
# #                                                        debug=False)
# #
# #        await self.upload_csv(upload_df=empty_df.head(0),
# #                              upload_method='REPLACE',
# #                              is_index=is_index,
# #                              partition_key=dataset_partition_id,
# #                              session=session,
# #                              debug=False)
#         if debug:
#             print(f"\n\n🎭 starting Stage 1")

#         res = await dataset_routes.delete_partition_stage_1(full_auth=full_auth,
#                                                             dataset_id=dataset_id,
#                                                             dataset_partition_id=dataset_partition_id,
#                                                             debug=debug, session=session)
#         if debug:
#             print(f"\n\n🎭 Stage 1 response -- {res.status}")
#             print(res)

#         stage_2_res = None
#         if debug:
#             print('starting Stage 2')
#         stage_2_res = await dataset_routes.delete_partition_stage_2(full_auth=full_auth,
#                                                                     dataset_id=dataset_id,
#                                                                     dataset_partition_id=dataset_partition_id,
#                                                                     debug=debug, session=session)
#         if debug:
#             print(f"\n\n🎭 Stage 2 response -- {stage_2_res.status}")

#         stage_3_res = None
#         if debug:
#             print('starting Stage 3')
#         stage_3_res = await dataset_routes.index_dataset(full_auth=full_auth,
#                                                          dataset_id=dataset_id,
#                                                          debug=debug, session=session)
#         if debug:
#             print(f"\n\n🎭 Stage 3 response -- {stage_3_res.status}")

#         if is_close_session:
#             await session.close()

#         if debug:
#             print(stage_3_res)

#         if stage_3_res.status == 200:
#             return res.response

#     async def reset_dataset(self,
#                             full_auth: DomoFullAuth = None,
#                             is_index: bool = True,
#                             debug: bool = False
#                             ):
#         execute_reset = input(
#             "This function will delete all rows.  Type BLOW_ME_AWAY to execute:")

#         if execute_reset != 'BLOW_ME_AWAY':
#             print("You didn't type BLOW_ME_AWAY, moving on.")
#             return None

#         full_auth = full_auth or self.full_auth
#         dataset_id = self.id

#         if not full_auth:
#             raise Exception("full_auth required")

#         session = aiohttp.ClientSession()

#         # create empty dataset to retain schema
#         empty_df = await self.query_dataset_private(full_auth=full_auth,
#                                                     dataset_id=dataset_id,
#                                                     sql="SELECT * from table limit 1",
#                                                     session=session,
#                                                     debug=debug)
#         empty_df = empty_df.head(0)

#         # get partition list
# #         partition_list = await dataset_routes.list_partitions(full_auth=full_auth,
# #                                                               dataset_id=self.id,
# #                                                               debug=debug,
# #                                                               session=session)

# #         if len(partition_list) > 0:
# #             partition_list = chunk_list(partition_list, 100)

# #             for index, pl in enumerate(partition_list):
# #                 print(f'🥫 starting chunk {index + 1} of {len(partition_list)}')

# #                 await asyncio.gather(*[self.delete_partition(full_auth=full_auth,
# #                                                              dataset_partition_id=partition.get('partitionId'),
# #                                                              session=session,
# #                                                              empty_df=empty_df,
# #                                                              debug=False) for partition in pl])
# #                 if is_index:
# #                     await self.index_dataset(session=session)

#         res = await self.upload_csv(upload_df=empty_df,
#                                     upload_method='REPLACE',
#                                     is_index=is_index,
#                                     session=session,
#                                     debug=False)

#         await session.close()
#         return True

#     async def delete(self,
#                      dataset_id=None,
#                      full_auth: DomoFullAuth = None,
#                      debug: bool = False,
#                      session: aiohttp.ClientSession = None):
#         try:
#             is_close_session = False

#             if not session:
#                 session = aiohttp.ClientSession()
#                 is_close_session = True

#             return await dataset_routes.delete(
#                 full_auth=full_auth or self.full_auth,
#                 dataset_id=dataset_id or self.id,
#                 debug=debug,
#                 session=session)

#         finally:
#             if is_close_session:
#                 await session.close()

#     # async def create(self,
#     #                   ds_name,
#     #                   ds_type ='api',
#     #                   schema = { "columns": [ {
#     #                       "name": 'col1',
#     #                       "type": 'LONG',
#     #                       "metadata": None,
#     #                       "upsertKey": False}
#     #                   ]},
#     #                   full_auth:DomoFullAuth = None,
#     #                   debug:bool = False)