In [None]:
"""
Snowflake Batch Prediction API Snowflake query scoring job

v1.0 Mike Taveirne (doyouevendata) 3/21/2020
"""

In [1]:
import pandas as pd
import requests
import time
from pandas.io.json import json_normalize

import my_creds
#from imp import reload
#reload(my_creds)

In [2]:
# datarobot parameters
API_KEY = my_creds.API_KEY
USERNAME = my_creds.USERNAME
DEPLOYMENT_ID = my_creds.DEPLOYMENT_ID
DATAROBOT_KEY = my_creds.DATAROBOT_KEY
# replace with the load balancer for your prediction instance(s)
DR_PREDICTION_HOST = my_creds.DR_PREDICTION_HOST
DR_APP_HOST = 'https://app.datarobot.com'

DR_MODELING_HEADERS = {'Content-Type': 'application/json', 'Authorization': 'token %s' % API_KEY}

headers = {'Content-Type': 'text/plain; charset=UTF-8', 'datarobot-key': DATAROBOT_KEY}

url = '{dr_prediction_host}/predApi/v1.0/deployments/{deployment_id}/'\
          'predictions'.format(dr_prediction_host=DR_PREDICTION_HOST, deployment_id=DEPLOYMENT_ID)

In [3]:
# snowflake parameters
SNOW_USER = my_creds.SNOW_USER
SNOW_PASS = my_creds.SNOW_PASS

### Retrieve Existing Data Connection

In [4]:
DATA_CONNECTION = 'snow_3_12_0_titanic'

response = requests.get(
        DR_APP_HOST + '/api/v2/externalDataStores/',
        headers=DR_MODELING_HEADERS,
    )

df = pd.io.json.json_normalize(response.json()['data'])[['id', 'canonicalName']]
DATA_CONNECTION_ID = df[df['canonicalName'] == DATA_CONNECTION]['id'].iloc[0]

### Save Credentials for Connection

In [36]:
json = {
    "credentialType": "basic",
    "user": my_creds.SNOW_USER,
    "password": my_creds.SNOW_PASS,
    "name": "snow_community_credentials"
}

response = requests.post(
        url = DR_APP_HOST + '/api/v2/credentials/'
        headers=DR_MODELING_HEADERS,
        json=json
    )

CREDENTIALS_ID = response.json()['credentialId']

### Retrieve Existing Credentials

In [5]:
# credential lookup
CREDENTIALS = 'snow_community_credentials'

response = requests.get(
        DR_APP_HOST + '/api/v2/credentials/',
        headers=DR_MODELING_HEADERS,
    )

df = pd.io.json.json_normalize(response.json()['data'])[['credentialId', 'name']]
CREDENTIALS_ID = df[df['name'] == CREDENTIALS]['credentialId'].iloc[0]

### Creating Session and Running Batch Prediction API Job

In [6]:
session = requests.Session()
session.headers = {
    'Authorization': 'Bearer {}'.format(API_KEY),
}

In [7]:
job_details = {
    "deploymentId": DEPLOYMENT_ID,
    "numConcurrent": 4,
    "passthroughColumns": ["PASSENGERID"],
    "includeProbabilities": True,
    "predictionInstance" : {
        "hostName": DR_PREDICTION_HOST,
        "datarobotKey": DATAROBOT_KEY
    },
    "intakeSettings": {
        "type": "jdbc",
        "fetchSize": 100000,
        "dataStoreId": DATA_CONNECTION_ID,
        "credentialId": CREDENTIALS_ID,
        #"table": "PASSENGERS_500K",
        #"schema": "PUBLIC",
        "query": "select * from PASSENGERS"
    },
    'outputSettings': {
        "type": "jdbc",
        "table": "PASSENGERS_SCORED_BATCH_API",
        "schema": "PUBLIC",
        "statementType": "insert",
        "dataStoreId": DATA_CONNECTION_ID, 
        "credentialId": CREDENTIALS_ID
    }
}

In [8]:
response = session.post(
        DR_APP_HOST + '/api/v2/batchPredictions',
        json=job_details
    )

job = response.json()
print('queued batch job: {}'.format(job['links']['self']))

queued batch job: https://app.datarobot.com/api/v2/batchPredictions/1234567891234567890/


In [9]:
if response.status_code == 202:

    while job['status'] == 'INITIALIZING':
        time.sleep(1)
        response = session.get(job['links']['self'])
        response.raise_for_status()
        job = response.json()
        
    print('completed INITIALIZING')
        
    if job['status'] == 'RUNNING':

        while job['status'] == 'RUNNING':
            time.sleep(1)
            response = session.get(job['links']['self'])
            response.raise_for_status()
            job = response.json()
            
    print('completed RUNNING')
    print('status is now {status}'.format(status=job['status']))

completed INITIALIZING
completed RUNNING
status is now COMPLETED
