**Run this notebook after running and validating the outputs of bulk_column_description_generator notebook**\
Provide the Catalog, and path that the column descriptions were written to (ending in csv or json)

**Important:**
Applying a comment to a table triggers an ALTER SQL command, which can disrupt Databricks pipelines and jobs.

In [0]:
dbutils.widgets.text("Catalog", "", "Enter Catalog Name (Mandatory):")
dbutils.widgets.text("Input Path", "", "Enter Input Path (Mandatory):")  # Ends with /json or /csv to decide on file type
dbutils.widgets.dropdown("Always Update Comments", choices=["true", "false"], defaultValue="true", label="Always Update Comments (Optional):")

In [0]:
catalog = dbutils.widgets.get("Catalog")
output_path = dbutils.widgets.get("Input Path")
always_update = dbutils.widgets.get("Always Update Comments").lower() == "true"

In [0]:
print(f"{catalog},{output_path},{always_update}")

In [0]:
from pyspark.sql.functions import col, concat, lit, regexp_replace

In [0]:
if output_path.endswith("/csv"):
    commented_tables = spark.read.csv(output_path, header=True, inferSchema=True)
elif output_path.endswith("/json"):
    commented_tables = spark.read.json(output_path)
else:
    raise Exception("Invalid Input Path. Must end with /csv or /json")

# Perform at least a basic validation by selecting the columns we are interested in
# DONE (Sierra): Added in a regex to remove any single quotes from the generated new_comment
if not always_update:
    commented_tables = commented_tables.filter("replace_comment").select(concat(col("table_catalog"), lit('.'), col("table_schema"), lit('.'), col("table_name")).alias("full_table_name"), "table_name", "new_comment").withColumn("cleaned_comment", regexp_replace("new_comment", "'", ""))
else:
    commented_tables = commented_tables.select(concat(col("table_catalog"), lit('.'), col("table_schema"), lit('.'), col("table_name")).alias("full_table_name"), "table_name", "new_comment").withColumn("cleaned_comment", regexp_replace("new_comment", "'", ""))

display(commented_tables)

In [0]:
def apply_table_comment(full_table_name, cleaned_comment):
    query = f"COMMENT ON TABLE {full_table_name} IS '{cleaned_comment}'"
    
    #try:
    spark.sql(query)
    #except Exception as e:
    #    print(f"Error executing query: {query}")
    #    print(f"Error: {e}")
    #    return False
    return True

**Important:**
Applying a comment to a table triggers an ALTER SQL command, which can disrupt Databricks pipelines and jobs.

In [0]:
# Using collect() brings all the data back to the driver, so this will be single threaded and hence slow.

for row in commented_tables.collect():
    full_table_name = row['full_table_name']
    cleaned_comment = row['cleaned_comment']

    if apply_table_comment(full_table_name, cleaned_comment):
        continue
    else:
        break