In [None]:
!pip install cognite-sdk-core
from cognite.client import CogniteClient
import requests
import json

In [None]:
hasuraEndpoint = "https://datapop.greenfield.cognite.ai/v1/graphql"
CLIENT_ID = input("Type in your CLIENT_ID")
CLIENT_SECRET = input("Type in your CLIENT_SECRET")
CDF_URL = "https://api.cognitedata.com"
TOKEN_SCOPES = ["https://api.cognitedata.com/.default"]
TOKEN_URL = "https://login.microsoftonline.com/806128be-974c-452a-a25c-f98d78eb24ea/oauth2/v2.0/token"

In [None]:
CDF_CLUSTER = "api"  # api, westeurope-1 etc
COGNITE_PROJECT = "itg-testing"

SCOPES = [f"https://{CDF_CLUSTER}.cognitedata.com/.default"]
cdf_client = CogniteClient(
    token_url=TOKEN_URL,
    token_client_id=CLIENT_ID,
    token_client_secret=CLIENT_SECRET,
    token_scopes=SCOPES,
    project=COGNITE_PROJECT,
    base_url=f"https://{CDF_CLUSTER}.cognitedata.com",
    client_name="client_secret_test_script",
    debug=True,
)
cognite_token = cdf_client.config.token()
hasura_token_response = requests.get(
    'https://datapop-auth.greenfield.cognite.ai/token', headers={
        "Authorization": f"Bearer {cognite_token}",
        "x-project": 'itg-testing',
        "x-cluster": 'api'
    }
).json()
hasura_token = hasura_token_response['token']

headers = {"Authorization": f"Bearer {hasura_token}", "Content-Type": "application/json"}
print("Authorization:", headers["Authorization"][0:30], "...")

In [None]:
# Prefect flow parameters
# Tenants name
tenant = "itg-testing"
# ITG project ID
projectId = "a14032b91-107e-44db-a627-087f8edd7ca2"
# ITG schema type
schemaName = "AreaPython"
# JSONata transformation code
jsonata = """
    {
        "id": DMSuniqueID,
        "name": Discipline
    }
"""
# CDF raw database name
rawDatabase = "usecase-workshop-s2s"
# CDS raw table name
rawTable = "c_d"
# Raw batch size (default in Prefect is 10000)
rawBatchSize = 100
# Raw batch size (default in Prefect is 2500)
itgBatchSize = 10

In [None]:
queryGetFlowId = """query GetRawToSchemaFlowId{
 flow(
   where: { name: { _eq: "raw-to-schema" }, archived: { _eq: false } }
   limit: 1
 ) {
   id
 }
}"""

In [None]:
r = requests.post(hasuraEndpoint, json={"query": queryGetFlowId}, headers=headers)
if r.status_code == 200:
    flowId = r.json()['data']['flow'][0]['id']
else:
    raise Exception(f"Query failed to run with a {r.status_code}.")

In [None]:
mutationRunFlow = """mutation RunFlow($flow_id: UUID!, $parameters: JSON) {
    create_flow_run(input: {
        flow_id: $flow_id,
        parameters: $parameters
    }) {
        id
    }
}"""

parametersRunFlow = {
    "flow_id": flowId,
    "parameters": {
        "clientId": CLIENT_ID,
        "clientSecret": CLIENT_SECRET,
        "cdfUrl": CDF_URL,
        "scopes": TOKEN_SCOPES,
        "tokenUrl": TOKEN_URL,
        "cdfCluster": "api",
        "cdfProject": tenant,
        "targetProjectId": projectId,
        "targetSchemaType": schemaName,
        "jsonataTransformation": jsonata,
        "rawDb": rawDatabase,
        "rawTable": rawTable,
        "rawBatchSize": rawBatchSize,
        "itgBatchSize": itgBatchSize,
        "forceReload": True
    }
}

In [None]:
r = requests.post(hasuraEndpoint, json={"query": mutationRunFlow, "variables": parametersRunFlow}, headers=headers)
if r.status_code == 200:
    print(f"Running flow = {json.dumps(r.json(), indent=2)}")
else:
    raise Exception(f"Query failed to run with a {r.status_code}.")

In [None]:
queryGetLastTasks = """query GetLastTasks($flow_id: uuid) {
  flow_run(
    where: { flow_id: { _eq: $flow_id }}
    limit: 5,
    order_by: {start_time: desc}
  ) {
    id
    name
    created
    agent_id
    flow_id
  }
}"""

parametersGetLastTasks = {
    "flow_id": flowId,
}


In [None]:
r = requests.post(hasuraEndpoint, json={"query": queryGetLastTasks, "variables": parametersGetLastTasks}, headers=headers)
if r.status_code == 200:
    lastFlowRunId = r.json()['data']['flow_run'][0]['id']
    result = r.json()
else:
    raise Exception(f"Query failed to run with a {r.status_code}.")

In [None]:
!pip install pandas
import pandas as pd
pd.set_option('display.max_rows', None)

In [None]:
pd.json_normalize(result['data']['flow_run'])

In [None]:
import time
# Need to sleep a little to take status of the last task run
time.sleep(30)

In [None]:
getStatusQuery = """query GetStatusQuery($flowRunId: uuid){
  flow_run(
    where: {
      id: {
        _eq: $flowRunId
      }
    })
    {
      end_time
      heartbeat
      logs {
        id
        created
        message
        level
      }
  }
}"""

parametersGetStatusQuery = {
    "flowRunId": lastFlowRunId,
}

In [None]:
r = requests.post(hasuraEndpoint, json={"query": getStatusQuery, "variables": parametersGetStatusQuery}, headers=headers)
if r.status_code == 200:
    r.json()
else:
    raise Exception(f"Query failed to run with a {r.status_code}.")

In [None]:
pd.json_normalize(r.json()['data']['flow_run'][0]['logs'])
