In [1]:
"""
Snowflake + DataRobot Python SDK example code.

1. Data extracted via Snowflake python connector
2. Python feature engineering completed
3. Data pushed into DataRobot to initiate a new project

v1.0 Mike Taveirne (doyouevendata) 1/17/2020
"""

'\nSnowflake + DataRobot Python SDK example code.\n\n1. Data extracted via Snowflake python connector\n2. Python feature engineering completed\n3. Data pushed into DataRobot to initiate a new project\n\nv1.0 Mike Taveirne (doyouevendata) 1/17/2020\n'

In [2]:
# uncomment and install if needed
#!pip install snowflake-connector-python[pandas] --user
#!pip install --user --upgrade datarobot

In [3]:
import snowflake.connector
import datetime
import datarobot as dr
import pandas as pd

import my_creds

In [4]:
# snowflake parameters
SNOW_ACCOUNT = my_creds.SNOW_ACCOUNT
SNOW_USER = my_creds.SNOW_USER
SNOW_PASS = my_creds.SNOW_PASS
SNOW_DB = 'TITANIC'
SNOW_SCHEMA = 'PUBLIC'

In [5]:
# datarobot parameters
DR_API_TOKEN = my_creds.DR_API_TOKEN
# replace app.datarobot.com with application host of your cluster if installed locally
DR_MODELING_ENDPOINT = 'https://app.datarobot.com/api/v2'
DR_MODELING_HEADERS = {'Content-Type': 'application/json', 'Authorization': 'token %s' % DR_API_TOKEN}

In [6]:
# create a connection
ctx = snowflake.connector.connect(
          user=SNOW_USER,
          password=SNOW_PASS,
          account=SNOW_ACCOUNT,
          database=SNOW_DB,
          schema=SNOW_SCHEMA,
          protocol='https'
)

# create a cursor
cur = ctx.cursor()

# execute sql
sql = "select * from titanic.public.passengers_training"
cur.execute(sql)

# fetch results into dataframe
df = cur.fetch_pandas_all()
df.head()

Unnamed: 0,PASSENGERID,SURVIVED,PCLASS,NAME,SEX,AGE,SIBSP,PARCH,TICKET,FARE,CABIN,EMBARKED
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [7]:
# feature engineering a new column for total family size
df['TOTAL_FAMILY_SIZE'] = df['SIBSP'] + df['PARCH'] + 1
df.head()

Unnamed: 0,PASSENGERID,SURVIVED,PCLASS,NAME,SEX,AGE,SIBSP,PARCH,TICKET,FARE,CABIN,EMBARKED,TOTAL_FAMILY_SIZE
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S,2
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C,2
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S,1
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S,2
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S,1


In [8]:
# create a connection to datarobot
dr.Client(token=DR_API_TOKEN, endpoint=DR_MODELING_ENDPOINT)

# create project
now = datetime.datetime.now().strftime('%Y-%m-%dT%H:%M')
project_name = 'Titanic_Survival_{}'.format(now)
proj = dr.Project.create(sourcedata=df,
    project_name=project_name)

# further work with project via the python API, or work in GUI (link to project printed below)
print(DR_MODELING_ENDPOINT[:-6] + 'projects/{}'.format(proj.id))

https://app.datarobot.com/projects/5e49cdac3ef439010e7da5c8
