In [None]:
# Sample Code to demonstrate how to use the Batch Prediction API to score data living in a Google Cloud Storage bucket and output the results back to the same bucket

In [None]:
import datarobot as dr

In [None]:
# Set DataRobot connection info here
API_KEY ='YOUR API KEY'
BATCH_PREDICTIONS_URL = "https://app.datarobot.com/api/v2"
DEPLOYMENT_ID = 'YOUR DEPLOYMENT ID'

# Set name for GCP credential in DataRobot
DR_CREDENTIAL_NAME = "NAME FOR THE GCP CREDENTIAL TO BE STORED"

# Set GCP Info
GCP_BUCKET_NAME = "YOUR GCS BUCKET NAME"
GCP_INPUT_SCORING_FILE = "YOUR INPUT SCORING FILE NAME"
GCP_OUTPUT_RESULTS_FILE = "YOUR OUTPUT RESULTS FILE NAME"

In [None]:
dr.Client(token=API_KEY,endpoint=BATCH_PREDICTIONS_URL)

In [None]:
# Create a GCP-specific Credential 
# NOTE: This cannot be done from the UI
# Format:
"""
{
"type": "service_account",
"project_id": "[PROJECT-ID]",
"private_key_id": "[KEY-ID]",
"private_key": "-----BEGIN PRIVATE KEY-----\n[PRIVATE-KEY]\n-----END PRIVATE KEY-----\n",
"client_email": "[SERVICE-ACCOUNT-EMAIL]",
"client_id": "[CLIENT-ID]",
"auth_uri": "https://accounts.google.com/o/oauth2/auth",
"token_uri": "https://accounts.google.com/o/oauth2/token",
"auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs",
"client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/[SERVICE-ACCOUNT-EMAIL]"
}
"""
# This can be generated and downloaded ready to drop in from within GCP
# 1. Go to IAM & Admin -> Service Accounts
# 2. Search for the Service Account you want to use (or create a new one)
# 3. Go to Keys
# 4. Click Add Key -> Create Key
# 5. Selection JSON key type
# 6. copy the contents of the json file into the gcp_key section of the credential code below
key = {
        "type": "service_account",
        "project_id": "********",
        "private_key_id": "************",
        "private_key": "-----BEGIN PRIVATE KEY-----\n\n-----END PRIVATE KEY-----\n",
        "client_email": "************",
        "client_id": "**********",
        "auth_uri": "https://accounts.google.com/o/oauth2/auth",
        "token_uri": "https://oauth2.googleapis.com/token",
        "auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs",
        "client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/********"
    }
    
print(type(key))
credential = dr.Credential.create_gcp(
    name=DR_CREDENTIAL_NAME,
    gcp_key=key
)

credential

In [None]:
# Use this code to look up the ID of the credential object created.
credential_id = None
for cred in dr.Credential.list():
    if cred.name == DR_CREDENTIAL_NAME:
        credential_id = cred.credential_id

print(credential_id)

In [None]:
# Set up our batch prediction job
# Input: Google Cloud Storage
# Output: Google Cloud Storage

job = dr.BatchPredictionJob.score(
    deployment=DEPLOYMENT_ID,
    intake_settings={
        'type': 'gcp',
        'url': "gs://{}/{}".format(GCP_BUCKET_NAME,GCP_INPUT_SCORING_FILE),
        "credential_id": credential_id
    },
    output_settings={
        'type': 'gcp',
        'url': "gs://{}/{}".format(GCP_BUCKET_NAME,GCP_OUTPUT_RESULTS_FILE),
        "credential_id": credential_id
    },
    # If explanations are required, uncomment the line below
    max_explanations=5,

    # If passthrough columns are required, use this line
    passthrough_columns=['column1','column2']
)

In [None]:
job.wait_for_completion()
job.get_status()