In [0]:
from pyspark.sql import SparkSession as SparkSession, functions as F
import os
import pandas as pd
import json

In [None]:
LINE_SEP = "  \n"
SLACK_URL = "https://www.youtube.com/"


In [None]:
def parse_tags_json_to_rows(json_str: str) -> list[tuple[str, str]]:
    """'{"k":"v","a":"b"}' -> [("k","v"),("a","b")]。Noneや壊れたJSONは空配列を返す。"""
    if not json_str:
        return []
    try:
        obj = json.loads(json_str)
        if isinstance(obj, dict):
            # すべて文字列化（数値/真偽も文字列にする運用ならここでstr()）
            return [(str(k), "" if v is None else str(v)) for k, v in obj.items()]
        else:
            return []
    except Exception:
        return []

In [None]:
try:
    # 引数の受け取り
    dbutils.widgets.text("SOURCE_TABLE", "")
    source_table = dbutils.widgets.get("SOURCE_TABLE")
    
    stage = os.environ.get("STAGE") 
    where_clause = "WHERE catalog RLIKE '_dev$'" if (stage != "prod") else ""

    # SQL文の生成
    sql_text = f"""
    SELECT * 
    FROM {source_table} 
    {where_clause}
    """
    print(sql_text)
    spark = SparkSession.builder.getOrCreate()
    df_target = spark.sql(sql_text)

    display(df_target) 
except Exception as e:
    raise Exception(f"`{source_table}`テーブルの取得に失敗しました。SQL: \"{sql_text}\", Error: {e}")

In [None]:
if df_target.count() == 0:
    print("Descriptionの更新対象がなかったため、処理をスキップして終了しました。")
else:
    error_records = []
    df_target_sorted = df_target.orderBy(F.col("catalog"), F.col("schema"), F.col("table"))
    for row in df_target_sorted.toLocalIterator():
        # カラムの取得
        catalog         = row['catalog']
        schema          = row['schema']
        table           = row['table']
        table_category  = row['table_category']
        tag_json        = row['tag_json']

        kvs = parse_tags_json_to_rows(tag_json)
        tags_text = ""
        for k, v in kvs:
            if tags_text != "":
                tags_text += "," 
            tags_text += f"'{k}' = '{v}'"

        # SQLの実行
        sql = ""
        try:
            if table_category == 0: # Schemaの場合
                sql= f"ALTER SCHEMA {catalog}.{schema} SET TAGS ({tags_text});"
            elif table_category == 1: # Tableの場合
                sql = f"ALTER TABLE {catalog}.{schema}.{table} SET TAGS ({tags_text});"
            elif table_category == 2: # Materialized Viewの場合
                sql = f"ALTER MATERIALIZED VIEW {catalog}.{schema}.{table} SET TAGS ({tags_text});"
            elif table_category == 3: # Viewの場合
                sql = f"ALTER VIEW {catalog}.{schema}.{table} SET TAGS ({tags_text});"
            else:
                raise ValueError(f"table_category is not 1 or 2: table_category={table_category}")
            spark.sql(sql)

        except Exception as e:
            error_records.append({
                "catalog": catalog,
                "schema": schema,
                "table": table,
                "table_category": table_category,
                "error_category": "update_tag_error",
                "tag_json": tag_json,
                "sql": sql,
                "error_message": str(e)
            })

    # エラーがある場合、エラーを表示
    if len(error_records) > 0:
        df_error = pd.DataFrame(error_records).astype("string")
        display(df_error)
        raise Exception("Tagsの更新に失敗したレコードがあります。")
    else:
        print("Tagsの更新がすべて成功しました。")
