In [None]:
"""
Snowflake Batch Prediction API Snowflake query scoring job

v1.0 Mike Taveirne (doyouevendata) 3/21/2020
"""

In [1]:
import pandas as pd
import requests
import time
from pandas.io.json import json_normalize
import json

import my_creds
#from imp import reload
#reload(my_creds)
from datetime import datetime

In [2]:
# datarobot parameters
API_KEY = my_creds.API_KEY
USERNAME = my_creds.USERNAME
DEPLOYMENT_ID = my_creds.DEPLOYMENT_ID
DATAROBOT_KEY = my_creds.DATAROBOT_KEY
# replace with the load balancer for your prediction instance(s)
DR_PREDICTION_HOST = my_creds.DR_PREDICTION_HOST
DR_APP_HOST = 'https://app.datarobot.com'

DR_MODELING_HEADERS = {'Content-Type': 'application/json', 'Authorization': 'token %s' % API_KEY}

headers = {'Content-Type': 'text/plain; charset=UTF-8', 'datarobot-key': DATAROBOT_KEY}

url = '{dr_prediction_host}/predApi/v1.0/deployments/{deployment_id}/'\
          'predictions'.format(dr_prediction_host=DR_PREDICTION_HOST, deployment_id=DEPLOYMENT_ID)

In [3]:
# snowflake parameters
SNOW_USER = my_creds.SNOW_USER
SNOW_PASS = my_creds.SNOW_PASS

### Retrieve Existing Data Connection

In [4]:
"""
    get a data connection by name, return None if not found
"""
def dr_get_data_connection(name):
    
    data_connection_id = None

    response = requests.get(
            DR_APP_HOST + '/api/v2/externalDataStores/',
            headers=DR_MODELING_HEADERS,
        )

    if response.status_code == 200:

        df = pd.io.json.json_normalize(response.json()['data'])[['id', 'canonicalName']]

        if df[df['canonicalName'] == name]['id'].size > 0:
            data_connection_id = df[df['canonicalName'] == name]['id'].iloc[0]
        
    else:

        print('Request failed; http error {code}: {content}'.format(code=response.status_code, content=response.content))

    return data_connection_id

In [5]:
data_connection_id = dr_get_data_connection('snow_3_12_0_titanic')

### Retrieve or Create Database Credentials

In [6]:
# get a saved credential set, return None if not found
def dr_get_catalog_credentials(name, cred_type):
    if cred_type not in ['basic', 's3']:
        print('credentials type must be: basic, s3 - value passed was {ct}'.format(ct=cred_type))
        return None
    
    credentials_id = None

    response = requests.get(
            DR_APP_HOST + '/api/v2/credentials/',
            headers=DR_MODELING_HEADERS,
        )

    if response.status_code == 200:

        df = pd.io.json.json_normalize(response.json()['data'])[['credentialId', 'name', 'credentialType']]

        if df[(df['name'] == name) & (df['credentialType'] == cred_type)]['credentialId'].size > 0:
            credentials_id = df[(df['name'] == name) & (df['credentialType'] == cred_type)]['credentialId'].iloc[0]
     
    else:

        print('Request failed; http error {code}: {content}'.format(code=response.status_code, content=response.content))

    return credentials_id

In [7]:
# create credentials set
def dr_create_catalog_credentials(name, cred_type, user, password, token=None):
    if cred_type not in ['basic', 's3']:
        print('credentials type must be: basic, s3 - value passed was {ct}'.format(ct=cred_type))
        return None
    
    if cred_type == 'basic':  
        json = {
            "credentialType": cred_type,
            "user": user,
            "password": password,
            "name": name
        }
    elif cred_type == 's3' and token != None:  
        json = {
            "credentialType": cred_type,
            "awsAccessKeyId": user,
            "awsSecretAccessKey": password,
            "awsSessionToken": token,
            "name": name
        }
    elif cred_type == 's3' and token == None:  
        json = {
            "credentialType": cred_type,
            "awsAccessKeyId": user,
            "awsSecretAccessKey": password,
            "name": name
        }
        
    response = requests.post(
        url = DR_APP_HOST + '/api/v2/credentials/',
        headers=DR_MODELING_HEADERS,
        json=json
    )
    
    if response.status_code == 201:

        return response.json()['credentialId']
        
    else:

        print('Request failed; http error {code}: {content}'.format(code=response.status_code, content=response.content))


In [8]:
# get or create a credential set
def dr_get_or_create_catalog_credentials(name, cred_type, user, password, token=None):
    cred_id = dr_get_catalog_credentials(name, cred_type)
    
    if cred_id == None:
        return dr_create_catalog_credentials(name, cred_type, user, password, token=None)
    else:
        return cred_id

In [9]:
credentials_id = dr_get_or_create_catalog_credentials('snow_community_credentials', 
                                                      'basic', my_creds.SNOW_USER, my_creds.SNOW_PASS)

### Creating Session and Running Batch Prediction API Job

In [10]:
session = requests.Session()
session.headers = {
    'Authorization': 'Bearer {}'.format(API_KEY)
}

In [11]:
job_details = {
    "deploymentId": DEPLOYMENT_ID,
    "numConcurrent": 4,
    "passthroughColumns": ["PASSENGERID"],
    "includeProbabilities": True,
    "predictionInstance" : {
        "hostName": DR_PREDICTION_HOST,
        "datarobotKey": DATAROBOT_KEY
    },
    "intakeSettings": {
        "type": "jdbc",
        "fetchSize": 100000,
        "dataStoreId": data_connection_id,
        "credentialId": credentials_id,
        #"table": "PASSENGERS_500K",
        #"schema": "PUBLIC",
        "query": "select * from PASSENGERS"
    },
    'outputSettings': {
        "type": "jdbc",
        "table": "PASSENGERS_SCORED_BATCH_API",
        "schema": "PUBLIC",
        "statementType": "insert",
        "dataStoreId": data_connection_id, 
        "credentialId": credentials_id
    }
}

In [12]:
response = session.post(
        DR_APP_HOST + '/api/v2/batchPredictions',
        json=job_details
    )

In [13]:
if response.status_code == 202:
    
    job = response.json()
    print('queued batch job: {}'.format(job['links']['self']))

    while job['status'] == 'INITIALIZING':
        time.sleep(3)
        response = session.get(job['links']['self'])
        response.raise_for_status()
        job = response.json()
        
    print('completed INITIALIZING')
        
    if job['status'] == 'RUNNING':

        while job['status'] == 'RUNNING':
            time.sleep(3)
            response = session.get(job['links']['self'])
            response.raise_for_status()
            job = response.json()
            
    print('completed RUNNING')
    print('status is now {status}'.format(status=job['status']))
    
    if job['status'] != 'COMPLETED':
        for i in job['logs']:
            print(i)
    
else:
    
    print('Job submission failed; http error {code}: {content}'.format(code=response.status_code, content=response.content))

queued batch job: https://app.datarobot.com/api/v2/batchPredictions/123456789012345678903/
completed INITIALIZING
completed RUNNING
status is now COMPLETED
