In [1593]:
import requests
import json

In [1594]:
prefectEndpoint = "http://34.77.49.169:4200"
accessToken = "no-token-yet"
headers = {"Authorization": f"Bearer {accessToken}"}

In [1595]:
# Authentification for Cognite Client
CLIENT_ID = "*****"
CLIENT_SECRET = "*****"
CDF_URL = "https://api.cognitedata.com"
TOKEN_SCOPES = ["https://api.cognitedata.com/.default"]
TOKEN_URL = "https://login.microsoftonline.com/806128be-974c-452a-a25c-f98d78eb24ea/oauth2/v2.0/token"

In [1596]:
# Prefect flow parameters
# Tenants name
tenant = "itg-testing"
# ITG project ID 
projectId = "a14032b91-107e-44db-a627-087f8edd7ca2"
# ITG schema type
schemaName = "AreaPython"
# JSONata transformation code
jsonata = """
    { 
        "id": DMSuniqueID, 
        "name": Discipline
    }
"""
# CDF raw database name
rawDatabase = "usecase-workshop-s2s"
# CDS raw table name
rawTable = "c_d"
# Raw batch size (default in Prefect is 10000)
rawBatchSize = 100
# Raw batch size (default in Prefect is 2500)
itgBatchSize = 10

In [1597]:
queryGetFlowId = """query GetRawToSchemaFlowId{
 flow(
   where: { name: { _eq: "raw-to-schema" }, archived: { _eq: false } }
   limit: 1
 ) {
   id
 }
}"""

In [1598]:
r = requests.post(prefectEndpoint, json={"query": queryGetFlowId}, headers=headers)
if r.status_code == 200:
    flowId = r.json()['data']['flow'][0]['id']
else:
    raise Exception(f"Query failed to run with a {r.status_code}.")

In [1599]:
mutationRunFlow = """mutation RunFlow($flow_id: UUID!, $parameters: JSON) {
    create_flow_run(input: { 
        flow_id: $flow_id,
        parameters: $parameters
    }) {
        id
    }
}"""

parametersRunFlow = {
    "flow_id": flowId,
    "parameters": {
        "clientId": CLIENT_ID,
        "clientSecret": CLIENT_SECRET,
        "cdfUrl": CDF_URL,
        "scopes": TOKEN_SCOPES,
        "tokenUrl": TOKEN_URL,
        "cdfCluster": "api",
        "cdfProject": tenant,
        "targetProjectId": projectId,
        "targetSchemaType": schemaName,
        "jsonataTransformation": jsonata,
        "rawDb": rawDatabase,
        "rawTable": rawTable,
        "rawBatchSize": rawBatchSize,
        "itgBatchSize": itgBatchSize,
        "forceReload": True
    }
}

In [None]:
r = requests.post(prefectEndpoint, json={"query": mutationRunFlow, "variables": parametersRunFlow}, headers=headers)
if r.status_code == 200:
    print(f"Running flow = {json.dumps(r.json(), indent=2)}")
else:
    raise Exception(f"Query failed to run with a {r.status_code}.")

In [1601]:
queryGetLastTasks = """query GetLastTasks($flow_id: uuid) {
  flow_run(
    where: { flow_id: { _eq: $flow_id }}
    limit: 5, 
    order_by: {start_time: desc}
  ) {
    id
    name
    created
    agent_id
    flow_id
  }
}"""

parametersGetLastTasks = {
    "flow_id": flowId,
}


In [1602]:
r = requests.post(prefectEndpoint, json={"query": queryGetLastTasks, "variables": parametersGetLastTasks}, headers=headers)
if r.status_code == 200:
    lastFlowRunId = r.json()['data']['flow_run'][0]['id']
    result = r.json()
else:
    raise Exception(f"Query failed to run with a {r.status_code}.")

In [1604]:
import pandas as pd
pd.set_option('display.max_rows', None)

In [None]:
pd.json_normalize(result['data']['flow_run'])

In [1606]:
import time
# Need to sleep a little bit to take status of the last task run
time.sleep(30)

In [1607]:
getStatusQuery = """query GetStatusQuery($flowRunId: uuid){
  flow_run(
    where: {
      id: {
        _eq: $flowRunId
      }
    }) 
    {
      end_time
      heartbeat
      logs {
        id
        created
        message
        level
      }
  }
}"""

parametersGetStatusQuery = {
    "flowRunId": lastFlowRunId,
}

In [1608]:
r = requests.post(prefectEndpoint, json={"query": getStatusQuery, "variables": parametersGetStatusQuery}, headers=headers)
if r.status_code == 200:
    r.json()
else:
    raise Exception(f"Query failed to run with a {r.status_code}.")

In [None]:
pd.json_normalize(r.json()['data']['flow_run'][0]['logs'])
