#1. Install Dependencies
First install the libraries needed to execute recipes, this only needs to be done once, then click play.


In [ ]:
!pip install git+https://github.com/google/starthinker


#2. Get Cloud Project ID
To run this recipe [requires a Google Cloud Project](https://github.com/google/starthinker/blob/master/tutorials/cloud_project.md), this only needs to be done once, then click play.


In [ ]:
CLOUD_PROJECT = 'PASTE PROJECT ID HERE'

print("Cloud Project Set To: %s" % CLOUD_PROJECT)


#3. Get Client Credentials
To read and write to various endpoints requires [downloading client credentials](https://github.com/google/starthinker/blob/master/tutorials/cloud_client_installed.md), this only needs to be done once, then click play.


In [ ]:
CLIENT_CREDENTIALS = 'PASTE CREDENTIALS HERE'

print("Client Credentials Set To: %s" % CLIENT_CREDENTIALS)


#4. Enter DT To Table Parameters
Move data from a DT bucket into a BigQuery table.
 1. Ensure your user has <a href='https://developers.google.com/doubleclick-advertisers/dtv2/getting-started' target='_blank'>access to the bucket</a>.
 1. Provide the DT bucket name to read from.
 1. Provide the path of the files to read.
 1. Each file is synchronized to a unique table.  Use a view or aggregate select.
Modify the values below for your use case, can be done multiple times, then click play.


In [ ]:
FIELDS = {
  "bucket":"", # Name of bucket where DT files are stored.
  "paths":[], # List of prefixes to pull specific DT files.
  "days":2, # Number of days back to synchronize.
  "hours":0, # Number of hours back to synchronize.
  "dataset":"", # Existing dataset in BigQuery.
}

print("Parameters Set To: %s" % FIELDS)


#4. Execute DT To Table
This does NOT need to be modified unles you are changing the recipe, click play.


In [ ]:
from starthinker.util.project import project
from starthinker.script.parse import json_set_fields

USER_CREDENTIALS = '/content/user.json'

TASKS = [
  {
    "dt": {
      "auth": "user",
      "from": {
        "bucket": {
          "field": {
            "name": "bucket",
            "kind": "string",
            "order": 1,
            "default": "",
            "description": "Name of bucket where DT files are stored."
          }
        },
        "paths": {
          "field": {
            "name": "paths",
            "kind": "string_list",
            "order": 2,
            "default": [],
            "description": "List of prefixes to pull specific DT files."
          }
        },
        "days": {
          "field": {
            "name": "days",
            "kind": "integer",
            "order": 3,
            "default": 2,
            "description": "Number of days back to synchronize."
          }
        },
        "hours": {
          "field": {
            "name": "hours",
            "kind": "integer",
            "order": 3,
            "default": 0,
            "description": "Number of hours back to synchronize."
          }
        }
      },
      "to": {
        "auth": "service",
        "dataset": {
          "field": {
            "name": "dataset",
            "kind": "string",
            "order": 3,
            "default": "",
            "description": "Existing dataset in BigQuery."
          }
        }
      }
    }
  }
]

json_set_fields(TASKS, FIELDS)

project.initialize(_recipe={ 'tasks':TASKS }, _project=CLOUD_PROJECT, _user=USER_CREDENTIALS, _client=CLIENT_CREDENTIALS, _verbose=True)
project.execute()
