In [0]:
%pip install /Volumes/catalog_dlt_meta/default/dlt-meta-volume/ab_cancel_translator_stubs/ab_cancel_translator_stubs-1.0.0-py3-none-any.whl

In [0]:
dbutils.library.restartPython()

In [0]:
# The .whl file provides these imports:
from ab_cancel_translator_stubs import create_translator, Msg, TranslationResult
from ab_cancel_translator_stubs.integration import IntegrationHelper

# Your integration file uses those imports to create DLT-Meta specific functionality:
class ABCancelTranslatorPipeline:
    def __init__(self, spark, dataflow_spec):
        self.translator = create_translator()  # From .whl
        # DLT-Meta specific logic here

In [0]:
import subprocess
import sys
import os

wheel_path = '/Volumes/catalog_dlt_meta/default/dlt-meta-volume/ab_cancel_translator_stubs/ab_cancel_translator_stubs-1.0.0-py3-none-any.whl'

subprocess.check_call([
                sys.executable, "-m", "pip", "install", wheel_path, "--quiet"
            ], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)

In [0]:
def validate_ab_translator_availability():
    """Validate AB Cancel Translator availability"""
    try:
        from ab_cancel_translator_stubs import __version__
        return True, __version__
    except ImportError:
        return False, None

In [0]:
validate_ab_translator_availability()

In [0]:
def __get_onboarding_file_dataframe(onboarding_file_path, env, uc_catalog_name, bronze_schema, silver_schema,uc_volume_path):
        onboarding_df = None
        if onboarding_file_path.lower().endswith(".json") or onboarding_file_path.lower().endswith(".template"):
            onboarding_df = spark.read.option("multiline", "true").json(
                onboarding_file_path
            )
            json_columns = onboarding_df.columns
            updated_json_cols = [col.replace("{env}", env) for col in json_columns]
            # self.onboard_file_type = "json"
            
            from pyspark.sql.functions import expr,regexp_replace

            onboarding_df = onboarding_df.withColumn(
                "bronze_database_{env}",
                expr(f"'{uc_catalog_name}.{bronze_schema}'")
            ).withColumn(
                "silver_database_{env}",
                expr(f"'{uc_catalog_name}.{silver_schema}'")
            ).withColumn(
                "bronze_data_quality_expectations_json_{env}"
                , regexp_replace("bronze_data_quality_expectations_json_{env}","\\{uc_volume_path\\}",uc_volume_path)
            )

            onboarding_df_dupes = (
                onboarding_df.groupBy("data_flow_id").count().filter("count > 1")
            )
            if len(onboarding_df_dupes.head(1)) > 0:
                onboarding_df_dupes.show()
                raise Exception("onboarding file have duplicated data_flow_ids! ")
        else:
            raise Exception(
                "Onboarding file format not supported! Please provide json file format"
            )
        return onboarding_df.toDF(*updated_json_cols)

In [0]:
onboarding_params_map = {
		"database": "catalog_dlt_meta.spec_schema",
		"onboarding_file_path": "file:/Workspace/Users/riyazali.mohammad@celebaltech.com/dlt-meta/demo/conf/onboarding.template",
		"bronze_dataflowspec_table": "bronze_dataflowspec_table", 
		"silver_dataflowspec_table": "silver_dataflowspec_table",
        "bronze_schema":"bronze_schema",
        "silver_schema":"silver_schema",
        "uc_volume_path":"/volumes/dummy_path",
		"overwrite": "True",
		"env": "prod",
		"version": "v1",
		"import_author": "Riyaz"
		}

In [0]:
display(__get_onboarding_file_dataframe(
    onboarding_params_map['onboarding_file_path'],
    onboarding_params_map['env'],
    onboarding_params_map["database"].split('.')[0],
    onboarding_params_map["bronze_schema"],
    onboarding_params_map["silver_schema"],
    onboarding_params_map['uc_volume_path']
))