# Mapping Pipeline
This notebook will deploy the pipeline that performs the mapping from sensor data tables to timestamped RDF triples. We do this to easily incorporate the telemetry data with the model graph defined later.

In [0]:
%run ./0-Parameters

In [0]:
from databricks.sdk import WorkspaceClient
from databricks.sdk.errors.platform import ResourceConflict
from databricks.sdk.service.pipelines import (
    PipelineLibrary,
    FileLibrary,
    PipelinesEnvironment,
)

w = WorkspaceClient()
notebook_path = (
    dbutils.notebook.entry_point.getDbutils()
    .notebook()
    .getContext()
    .notebookPath()
    .get()
)
cwd = notebook_path.rsplit("/", 1)[0]

pipeline_conf = dict(
    name=MAPPING_PIPELINE_NAME,
    catalog=TRIPLE_CATALOG,
    schema=TRIPLE_SCHEMA,
    development=True,
    serverless=True,
    environment=PipelinesEnvironment(
        dependencies=["git+https://github.com/aktungmak/spark-r2r.git"],
    ),
    libraries=[
        PipelineLibrary(
            file=FileLibrary(path=cwd + "/mapping_pipeline/src/dt_mapping.py")
        )
    ],
    configuration={
        "bundle.sourcePath": "./src",
        "triple_table": TRIPLE_TABLE,
        "bronze_table": BRONZE_TABLE,
    },
)

In [0]:
try:
    pipeline = w.pipelines.create(allow_duplicate_names=False, **pipeline_conf)
    print(f"Pipeline created with ID: {pipeline.pipeline_id}")
except ResourceConflict:
    pipeline = next(w.pipelines.list_pipelines(filter=f"name LIKE '{MAPPING_PIPELINE_NAME}'"))
    w.pipelines.update(pipeline_id=pipeline.pipeline_id, allow_duplicate_names=False, **pipeline_conf)
    print(f"Updated existing pipeline with ID: {pipeline.pipeline_id} updated")

In [0]:
w.pipelines.start_update(pipeline_id=pipeline.pipeline_id)