In [7]:
# dbt Cloud API Integration Notebook
# ================================
# This notebook demonstrates how to work with both the dbt Cloud Admin API
# and the Discovery API for metadata exploration

import os
import requests
import json
import pandas as pd
from dotenv import load_dotenv
import matplotlib.pyplot as plt
import networkx as nx

# Load environment variables
load_dotenv()

# Get credentials from environment variables
DBT_CLOUD_TOKEN = os.getenv("DBT_CLOUD_TOKEN")
DBT_CLOUD_ACCOUNT_ID = os.getenv("DBT_CLOUD_ACCOUNT_ID")
DBT_ENVIRONMENT_ID = os.getenv("DBT_ENVIRONMENT_ID")
DISCOVERY_API_URL = os.getenv("DISCOVERY_API_URL", "https://metadata.cloud.getdbt.com/graphql")

In [10]:
# Headers for the request
headers = {
    "Content-Type": "application/json",
    "Authorization": f"Bearer {DBT_CLOUD_TOKEN}"
}

# Sample GraphQL query to get models
query = """
query ($environmentId: BigInt!, $first: Int!) {
  environment(id: $environmentId) {
    applied {
      models(first: $first) {
        edges {
          node {
            name
            description
            resourceType
            uniqueId
            database
            schema
            alias
          }
        }
      }
    }
  }
}
"""

# Variables for the query
variables = {
    "environmentId": DBT_ENVIRONMENT_ID,
    "first": 5  # Limit to 10 models
}

# Create the request body
request_body = {
    "query": query,
    "variables": variables
}

# Make the request
def test_discovery_api():
    try:
        response = requests.post(
            DISCOVERY_API_URL,
            headers=headers,
            json=request_body
        )

        # Check if the request was successful
        if response.status_code == 200:
            result = response.json()

            # Check if there's an error in the response
            if "errors" in result:
                print(f"GraphQL Error: {json.dumps(result['errors'], indent=2)}")
                return None

            # Extract models from the response
            models = result.get("data", {}).get("environment", {}).get("applied", {}).get("models", {}).get("edges", [])

            print(f"✅ Successfully queried Discovery API")
            print(f"Found {len(models)} models")

            # Print model names as a sample
            for edge in models:
                model = edge.get("node", {})
                print(f"  - {model.get('name')} ({model.get('uniqueId')})")

            return result
        else:
            print(f"❌ Failed to query Discovery API: {response.status_code}")
            print(f"Response: {response.text}")
            return None

    except Exception as e:
        print(f"❌ Error: {str(e)}")
        return None

# Execute the test
if __name__ == "__main__":
    test_discovery_api()

✅ Successfully queried Discovery API
Found 5 models
  - dim_dbt__current_models (model.dbt_artifacts.dim_dbt__current_models)
  - dim_dbt__exposures (model.dbt_artifacts.dim_dbt__exposures)
  - dim_dbt__models (model.dbt_artifacts.dim_dbt__models)
  - dim_dbt__seeds (model.dbt_artifacts.dim_dbt__seeds)
  - dim_dbt__snapshots (model.dbt_artifacts.dim_dbt__snapshots)


In [21]:
def get_models_discovery(limit=10):
    """Get models from dbt Cloud Discovery API"""
    query = """
    query ($environmentId: BigInt!, $first: Int!) {
      environment(id: $environmentId) {
        applied {
          models(first: $first) {
            edges {
              node {
                name
                description
                resourceType
                uniqueId
                database
                schema
                alias
                tags
                rawCode
                materializedType
              }
            }
          }
        }
      }
    }
    """

    variables = {
        "environmentId": int(DBT_ENVIRONMENT_ID),
        "first": limit
    }

    result = run_graphql_query(query, variables)

    if result:
        edges = result.get("data", {}).get("environment", {}).get("applied", {}).get("models", {}).get("edges", [])
        models = [edge.get("node") for edge in edges]
        df = pd.DataFrame(models)

        # Print a summary header
        print(f"\n📊 Found {len(models)} dbt models\n")

        # Print each model with formatted details
        for model in models:
            print(f"📝 Model: {model.get('name')}")
            print(f"   • Type: {model.get('materializedType', 'N/A')}")
            print(f"   • Location: {model.get('database', 'N/A')}.{model.get('schema', 'N/A')}")
            if model.get('description'):
                print(f"   • Description: {model.get('description')}")
            if model.get('tags'):
                print(f"   • Tags: {', '.join(model.get('tags'))}")
            print()  # Add blank line between models

        return df
    else:
        print("❌ No models found or error occurred")
        return None