# Crisp basics

This page shows you how to get started with the Crisp datasets using the Python client library.

<table align="left">
  <td style="text-align: center">
    <a href="https://colab.research.google.com/github/gocrisp/analytics-blueprints-public/blob/main/notebooks/crisp_basics.ipynb">
      <img src="https://cloud.google.com/ml-engine/images/colab-logo-32px.png" alt="Google Colaboratory logo"><br> Open in Colab
    </a>
  </td>
  <td style="text-align: center">
    <a href="https://console.cloud.google.com/vertex-ai/colab/import/https:%2F%2Fraw.githubusercontent.com%2Fgocrisp%2Fanalytics-blueprints-public%2Fmain%2Fnotebooks%2Fcrisp_basics.ipynb">
      <img width="32px" src="https://cloud.google.com/ml-engine/images/colab-enterprise-logo-32px.png" alt="Google Cloud Colab Enterprise logo"><br> Open in Colab Enterprise
    </a>
  </td> 
  <td style="text-align: center">                                                                             
    <a href="https://console.cloud.google.com/vertex-ai/notebooks/deploy-notebook?download_url=https://raw.githubusercontent.com/gocrisp/analytics-blueprints-public/main/notebooks/crisp_basics.ipynb">
      <img src="https://lh3.googleusercontent.com/UiNooY4LUgW_oTvpsNhPpQzsstV5W8F7rYgxgGBD85cWJoLmrOzhVs_ksK_vgx40SHs7jCqkTkCk=e14-rj-sc0xffffff-h130-w32" alt="Vertex AI logo"><br> Open in Workbench
    </a>
  </td>
  <td style="text-align: center">
    <a href="https://github.com/gocrisp/analytics-blueprints-public/blob/main/notebooks/crisp_basics.ipynb">
      <img src="https://cloud.google.com/ml-engine/images/github-logo-32px.png" alt="GitHub logo"><br> View on GitHub
    </a>
  </td>
</table>

### Install common dependencies

This is just a placeholder!

In [None]:
!pip install noop

### Restart Python

In [None]:
import sys

if "google.colab" in sys.modules:

    import IPython

    app = IPython.Application.instance()
    app.kernel.do_shutdown(True)

### Set your account and connector ID

In [None]:
ACCOUNT_ID = "80548"
CONNECTOR_ID = "3378" # optional

### Detect environment that you are running with

In [None]:
ipython_env = str(get_ipython())
if "google.colab" in ipython_env:
    environment_type = "colab"
elif "Databricks" in ipython_env:
    environment_type = "databricks"
elif "ipykernel" in ipython_env:
    environment_type = "local"
else:
    raise ValueError("Unsupported environment")

print("Environment type: {}".format(environment_type))

## Import the libraries used in this tutorial and authenticate if needed

In [None]:
import os

if environment_type == "colab":
    from google.cloud import bigquery, exceptions
    from google.colab import auth
    auth.authenticate_user()
elif environment_type == "databricks":
    from pyspark.sql import SparkSession
elif environment_type == "local":
    from google.cloud import bigquery, exceptions
else:
    print("No extra imports")


### Define source dataset

In [None]:

if not ACCOUNT_ID or ACCOUNT_ID == "[your-account-id]":
    raise ValueError("Please set your ACCOUNT_ID")

if environment_type == "colab" or environment_type == "local":
    src_project = 'crisp-frontier-dev'
    src_dataset = f"analytics_blueprints_{ACCOUNT_ID}"
elif environment_type == "databricks":
    src_project = 'prod'
    if not CONNECTOR_ID or CONNECTOR_ID == "[your-connector-id]":
        raise ValueError("Please set your CONNECTOR_ID")
    src_dataset = f"schema_{ACCOUNT_ID}_{CONNECTOR_ID}"
elif environment_type == "local":
    src_project = 'crisp-frontier-dev'
    src_dataset = f"analytics_blueprints_{ACCOUNT_ID}"
else:
    print("No extra imports")


### Loading data into DataFrame depending on the environment

In [None]:
from IPython.core.magic import register_cell_magic


@register_cell_magic
def load_data(line, cell):

    global_vars = globals()

    formatted_query = cell.format(**global_vars)

    if environment_type == "colab":
        client = bigquery.Client(project=src_project)
        query_job = client.query(formatted_query)
        df = query_job.result().to_dataframe()
    elif environment_type == "local":
        client = bigquery.Client()
        query_job = client.query(formatted_query)
        df = query_job.result().to_dataframe()
    elif environment_type == "databricks":
        spark = SparkSession.builder.getOrCreate()
        df = spark.sql(formatted_query).toPandas()
    else:
        raise ValueError("Unsupported environment")

    if line:
        globals()[line.strip()] = df
    else:
        return df

### Load data into a dataframe

In [None]:
src_table = 'normalized_unfi_fact_sales'

In [None]:
%%load_data df
SELECT * FROM `{src_project}`.`{src_dataset}`.`{src_table}` LIMIT 10

In [None]:
display(df)