# Overview

This notebook builds an inverted index that maps pipeline tags to pipeline IDs. The index is stored in a Delta table and enables efficient discovery of pipelines by tags without having to query the Databricks API for every pipeline.

# Parameters
- `monitoring_catalog` - the catalog for the index table
- `monitoring_schema` - the schema for the index table
- `pipeline_tags_index_table_name` - the name of the index table

In [None]:
import logging
import sys

sys.path.append("../lib")

from databricks_ingestion_monitoring.common import PipelineTagsIndexBuilder

dbutils.widgets.text("monitoring_catalog", "")
dbutils.widgets.text("monitoring_schema", "")
dbutils.widgets.text("pipeline_tags_index_table_name", "pipeline_tags_index")

logging.basicConfig(
    level=logging.INFO, format="%(asctime)s [%(levelname)s] (%(name)s) %(message)s"
)

# Build the index
builder = PipelineTagsIndexBuilder(
    monitoring_catalog=dbutils.widgets.get("monitoring_catalog"),
    monitoring_schema=dbutils.widgets.get("monitoring_schema"),
    index_table_name=dbutils.widgets.get("pipeline_tags_index_table_name"),
)

builder.build_index(spark)