#1. Install Dependencies
First install the libraries needed to execute recipes, this only needs to be done once, then click play.


In [ ]:
!pip install git+https://github.com/google/starthinker


#2. Get Cloud Project ID
To run this recipe [requires a Google Cloud Project](https://github.com/google/starthinker/blob/master/tutorials/cloud_project.md), this only needs to be done once, then click play.


In [ ]:
CLOUD_PROJECT = 'PASTE PROJECT ID HERE'

print("Cloud Project Set To: %s" % CLOUD_PROJECT)


#3. Get Client Credentials
To read and write to various endpoints requires [downloading client credentials](https://github.com/google/starthinker/blob/master/tutorials/cloud_client_installed.md), this only needs to be done once, then click play.


In [ ]:
CLIENT_CREDENTIALS = 'PASTE CREDENTIALS HERE'

print("Client Credentials Set To: %s" % CLIENT_CREDENTIALS)


#4. Enter Census Data Correlation Parameters
Correlate another table with US Census data.  Expands a data set dimensions by finding population segments that correlate with the master table.
 1. Pre-requisite is Census Normalize, run that at least once.
 1. Specify JOIN, PASS, SUM, and CORRELATE columns to build the correlation query.
 1. Define the DATASET and TABLE for the joinable source. Can be a view.
 1. Choose the significance level.  More significance usually means more NULL results, balance quantity and quality using this value.
 1. Specify where to write the results.
 1. <br>IMPORTANT:</b> If you use VIEWS, you will have to delete them manually if the recipe changes.
Modify the values below for your use case, can be done multiple times, then click play.


In [ ]:
FIELDS = {
  'auth': 'service',  # Credentials used for writing data.
  'join': '',  # Name of column to join on, must match Census Geo_Id column.
  'pass': [],  # Comma seperated list of columns to pass through.
  'sum': [],  # Comma seperated list of columns to sum, optional.
  'correlate': [],  # Comma seperated list of percentage columns to correlate.
  'from_dataset': '',  # Existing BigQuery dataset.
  'from_table': '',  # Table to use as join data.
  'significance': '80',  # Select level of significance to test.
  'to_dataset': '',  # Existing BigQuery dataset.
  'type': 'table',  # Write Census_Percent as table or view.
}

print("Parameters Set To: %s" % FIELDS)


#5. Execute Census Data Correlation
This does NOT need to be modified unles you are changing the recipe, click play.


In [ ]:
from starthinker.util.project import project
from starthinker.script.parse import json_set_fields

USER_CREDENTIALS = '/content/user.json'

TASKS = [
  {
    'census': {
      'auth': 'user',
      'to': {
        'type': {'field': {'choices': ['table','view'],'description': 'Write Census_Percent as table or view.','order': 10,'name': 'type','default': 'table','kind': 'choice'}},
        'dataset': {'field': {'description': 'Existing BigQuery dataset.','name': 'to_dataset','default': '','order': 9,'kind': 'string'}}
      },
      'correlate': {
        'table': {'field': {'description': 'Table to use as join data.','name': 'from_table','default': '','order': 6,'kind': 'string'}},
        'correlate': {'field': {'description': 'Comma seperated list of percentage columns to correlate.','name': 'correlate','default': [],'order': 4,'kind': 'string_list'}},
        'sum': {'field': {'description': 'Comma seperated list of columns to sum, optional.','name': 'sum','default': [],'order': 3,'kind': 'string_list'}},
        'significance': {'field': {'choices': ['80','90','98','99','99.5','99.95'],'description': 'Select level of significance to test.','order': 7,'name': 'significance','default': '80','kind': 'choice'}},
        'join': {'field': {'description': 'Name of column to join on, must match Census Geo_Id column.','name': 'join','default': '','order': 1,'kind': 'string'}},
        'dataset': {'field': {'description': 'Existing BigQuery dataset.','name': 'from_dataset','default': '','order': 5,'kind': 'string'}},
        'pass': {'field': {'description': 'Comma seperated list of columns to pass through.','name': 'pass','default': [],'order': 2,'kind': 'string_list'}}
      }
    }
  }
]

json_set_fields(TASKS, FIELDS)

project.initialize(_recipe={ 'tasks':TASKS }, _project=CLOUD_PROJECT, _user=USER_CREDENTIALS, _client=CLIENT_CREDENTIALS, _verbose=True, _force=True)
project.execute(_force=True)
