#### AI Catalog API Demo

**Author:** Andrew Kruchko

**Label:** AI Catalog

**Scope**: The scope of this notebook is to provide instructions on how to create and share datasets in AI Catalog and use them to create projects and run predictions.

**Requirements:** Python 3.7 or higher; DataRobot API version 2.21 or higher

#### Import Libraries

In [None]:
import yaml
import requests
import pandas as pd
import datarobot as dr

#### Connect to DataRobot and read credencials

In [None]:
dr.Client(config_path='config_path.yaml')

with open("config_path.yaml", 'r') as stream:
    creds = yaml.safe_load(stream)

In [None]:
def dr_rest_call(url, req_func, payload=None):
    """
    to run the API call
    url: the API endpoint
    req_func: a requests function e.g. requests.post
    payload[optional]: a dictionary with parameters
    """
    headers = {'Authorization': f"Token {creds['token']}",
               'Content-Type': 'application/json;charset=UTF-8'}
    return req_func(f"{creds['base_url']}{url}", headers=headers, json=payload)

#### Creating a dataset or a data source

In [None]:
path_to_data = 'data.csv'

In [None]:
# from a local file
dataset = dr.Dataset.create_from_file(file_path=path_to_data)

In [None]:
# from a file object
with open(path_to_data, 'rb') as f:
    dataset = dr.Dataset.create_from_file(filelike=f)

In [None]:
df = pd.read_csv(path_to_data)
df_lst = df.to_dict(orient='records')

In [None]:
# from a pandas data frame
dataset = dr.Dataset.create_from_in_memory_data(data_frame=df)

In [None]:
# from a list of dictionaries representing rows of data
dataset = dr.Dataset.create_from_in_memory_data(records=df_lst)

In [None]:
# based on csv data from a URL
dataset = dr.Dataset.create_from_url(url='https://data.csv')

In [None]:
# from a DB
# getting a dirver
ms_sql_driver = [drv for drv in dr.DataDriver.list() if drv.class_name == 'com.microsoft.sqlserver.jdbc.SQLServerDriver'][-1]

# creating a datastore
datastore = dr.DataStore.create(data_store_type='jdbc', 
                                canonical_name='Demo DB', 
                                driver_id=ms_sql_driver.id, 
                                jdbc_url=creds['jdbc_url'])

# creating a datasource based on a query
query = "select * from db.schema.table"
params = dr.DataSourceParameters(data_store_id=datastore.id, 
                                 query=query)

datasource = dr.DataSource.create(data_source_type='jdbc', 
                                  canonical_name='datasource_query', 
                                  params=params)

# creating a datasource based on a table
params = dr.DataSourceParameters(data_store_id=datastore.id, 
                                 schema='schema',
                                 table='table')

datasource = dr.DataSource.create(data_source_type='jdbc', 
                                  canonical_name='datasource_table', 
                                  params=params)

#### Sharing a dataset and a data source

In [None]:
# specifying a list of users to share with and their role
users = ['user@domain.com']
role = dr.enums.SHARING_ROLE.READ_ONLY

In [None]:
# sharing a dataset trough the API call
data = {'data': [{'username': user, 'role': role} for user in users]}
sharing_resp = dr_rest_call(f'/api/v2/datasets/{dataset.id}/accessControl', requests.patch, payload=data)

In [None]:
# sharing a data source using python client
access_lst = [dr.SharingAccess(username=user, role=role) for user in users]
datasource.share(access_lst)

#### Creating a project

In [None]:
# creating a project from a dataset
dr.Project.create_from_dataset(dataset_id=dataset.id, 
                               project_name=dataset.name)

In [None]:
# creating a project from a data source
dr.Project.create_from_data_source(data_source_id=datasource.id, 
                                   username=creds['db_user'], 
                                   password=creds['db_pass'], 
                                   project_name=datasource.canonical_name
                                  )

#### Using a dataset to run a batch prediction job

In [None]:
# specifying deployment and dataset id
deployment_id = 'deployment id'
dataset_id = 'dataset id'

# preparing parameters to run a batch prediction job
data = {'deploymentId': deployment_id,
        'passthroughColumnsSet': 'all',
        'intakeSettings': 
            {'type': 'dataset',
             'datasetId': dataset_id},
        'outputSettings':
            {'type': 'localFile', 
            }
       }

In [None]:
# running a batch prediction job
batch_pred_resp = dr_rest_call('/api/v2/batchPredictions', requests.post, payload=data)

# getting its id and the object based on it
batch_pred_job_id = batch_pred_resp.json()['id']
batch_pred_job = dr.BatchPredictionJob.get(batch_pred_job_id)

# waiting for completion and writing the results
batch_pred_job.wait_for_completion()
with open('data/predictions.csv', 'wb') as f:
    batch_pred_job.download(f)