In [None]:
dbutils.widgets.text("SOURCE_CATALOG", "")
source_catalog = dbutils.widgets.get("SOURCE_CATALOG")

In [None]:
spark.sql(f"""
CREATE OR REPLACE FUNCTION {source_catalog}.datasteward.clear_str(str STRING)
RETURNS STRING
RETURN 
  REGEXP_REPLACE(str, '^[\\u0009\\u0020\\u3000]+|[\\u0009\\u0020\\u3000]+$', '');
""")

In [None]:
spark.sql(f'''
CREATE OR REPLACE FUNCTION {source_catalog}.datasteward.normalize_text(prefix_text STRING, text STRING)
RETURNS STRING
RETURN CONCAT(prefix_text, REGEXP_REPLACE({source_catalog}.datasteward.clear_str(text), "(?<!  )\\r?\\n", CONCAT("  ", CHR(10))), "  \n");
''')

In [None]:
spark.sql(f'''
CREATE OR REPLACE FUNCTION {source_catalog}.datasteward.normalize_url(prefix_text STRING, link_text STRING, url STRING)
RETURNS STRING
RETURN CONCAT(prefix_text, "[", link_text, "](", {source_catalog}.datasteward.clear_str(url), ")  \n");
''')

In [None]:
spark.sql(f"CREATE OR REPLACE FUNCTION {source_catalog}.datasteward.cron_to_japanese(cron_expr STRING)" +
'''
RETURNS STRING
LANGUAGE PYTHON
AS $$
import re

# ===== 定数群 =====
WEEK_EN = ["SUN","MON","TUE","WED","THU","FRI","SAT"]
WEEK_JA = ["日","月","火","水","木","金","土"]
WEEK_TO_NUM = {name:i for i, name in enumerate(WEEK_EN)}
MONTH_EN = ["JAN","FEB","MAR","APR","MAY","JUN","JUL","AUG","SEP","OCT","NOV","DEC"]
MONTH_TO_NUM = {name:i+1 for i, name in enumerate(MONTH_EN)}

LIMIT_RANGES = {
    "分": {"min": 0, "max": 59},
    "時": {"min": 0, "max": 23},
    "日": {"min": 1, "max": 31},
    "週": {"min": 0, "max": 7},
    "月": {"min": 1, "max": 12},
    "年": {"min": 0, "max": 2999},
}
EXAMPLE_MAX_LENGTH = 10


# ===== グローバル変数 =====
is_not_simple_expr = False


# ===== クーロン式の分解 =====
def _parse_cron(cron_expr: str):
    """
    クーロン式を各フィールドに分解する
    - 5フィールド: minute, hour, dom, mon, dow
    - 6フィールド: minute, hour, dom, mon, dow, year
    - \'?\' は \'*\' として扱う
    返り値: (minute, hour, dom, mon, dow, year_or_none)
    """
    s = cron_expr.strip().replace("\u3000", " ")
    m = re.fullmatch(r"(?is)\s*cron\(\s*(.*?)\s*\)\s*", s)
    if m:
        s = m.group(1)
    s = re.sub(r"\s+", " ", s).strip()
    parts = s.split()
    # parts = [("*" if p == "?" else p).upper() for p in parts]

    if len(parts) == 5:
        year = None
        minute, hour, dom, mon, dow = parts
    elif len(parts) == 6:
        minute, hour, dom, mon, dow, year = parts
    else:
        raise ValueError(f"未対応のフィールド数: {len(parts)} -> {parts}")
    return (minute, hour, dom, mon, dow, year)


# ===== 記述方法のチェック =====
def _num_or_name_to_num(tok: str, kind: str) -> int:
    t = tok.strip().upper()
    if kind == "週":
        if t in WEEK_TO_NUM: 
            return WEEK_TO_NUM[t]
        if t == "7": 
            return 0  # 0/7=Sun
    elif kind == "月":
        if t in MONTH_TO_NUM: 
            return MONTH_TO_NUM[t]
    if t.isdigit():
        t = int(t)
        _validate_value_range(t, kind)
        return t
    raise ValueError(f"解決できない値が含まれています（{kind}）: {tok}")


def _validate_value_range(value: int, kind: str) -> None:
    if (value < LIMIT_RANGES[kind]["min"]) or (LIMIT_RANGES[kind]["max"] < value):
        raise ValueError(f"値が範囲外です（{kind}）: {value}")


def _validate_value(expr: str, kind: str) -> None:
    if not expr.isdigit():
        raise ValueError(f"数値ではない値が含まれています（{kind}）: {expr}")
    value = int(expr)
    _validate_value_range(value, kind)


def _validate_hyphen(a: str, b: str, kind: str) -> None:
    if kind in ("週", "月"):
        val_a = _num_or_name_to_num(a, kind); val_b = _num_or_name_to_num(a, kind)
        _validate_value_range(val_a, kind); _validate_value_range(val_b, kind)
    else :
        _validate_value(a, kind); _validate_value(b, kind)
        val_a = int(a); val_b = int(b)
    if val_a > val_b:
        raise ValueError(f"レンジが逆順です（{kind}）: {val_a}-{val_b}")


# ===== 実行時間(hh:mm)の例の生成 =====
def _generate_list(minv:int, maxv:int, step:int = 1) -> list[int]:
    return list(range(int(minv), int(maxv) + 1, step))


def _split_hyphen(expr:str, kind:str, step:int = 1) -> list[int]:
    a, b = expr.split("-")
    _validate_hyphen(a, b, kind)
    return _generate_list(int(a), int(b), step)


def _split_slash(expr:str, kind:str) -> list[int]:
    base, step = expr.split("/")
    _validate_value(step, kind)
    step = int(step)
    if base.isdigit():
        val = int(base)
        _validate_value_range(val, kind)
        return _generate_list(val, LIMIT_RANGES[kind]["max"], step)
    elif base == "*":
        return _generate_list(LIMIT_RANGES[kind]["min"], LIMIT_RANGES[kind]["max"], step)
    elif "-" in base:
        return _split_hyphen(base, kind, step)
    else:
        raise ValueError(f"解決できない値が含まれています（{kind}）: {expr}")


def split_time(expr:str, kind:str) -> list[int]:
    if expr.isdigit():
        val = int(expr)
        _validate_value_range(val, kind)
        return [val]
    elif expr == "*":
        return _generate_list(LIMIT_RANGES[kind]["min"], LIMIT_RANGES[kind]["max"])
    elif "," in expr:
        global is_not_simple_expr; is_not_simple_expr = True
        pieces = expr.split(",")
        vals = []
        for p in pieces:
            if p.isdigit():
                val = int(p)
                _validate_value_range(val, kind)
                vals.append(val)
            elif "/" in p:
                vals.extend(_split_slash(p, kind))
            elif "-" in p:
                vals.extend(_split_hyphen(p, kind))
            else:
                raise ValueError(f"解決できない値が含まれています（{kind}）: {expr}")
        return sorted(set(vals))     
    elif "/" in expr:
        return _split_slash(expr, kind)
    elif "-" in expr:
        return _split_hyphen(expr, kind)
    else:
        raise ValueError(f"解決できない値が含まれています（{kind}）: {expr}")


# ===== 分・時 を 日本語変換 =====
def _pad2(n:int) -> str:
    return f"{int(n):02d}"


def _parse_hyphen(expr:str, kind:str) -> str:
    a, b = expr.split("-")
    return f"{_pad2(a)}～{_pad2(b)}{kind}"


def parse_time(expr:str, kind:str) -> dict[str]:
    if expr.isdigit():
        return {"value" : _pad2(expr)}
    elif expr == "*":
        return {"frequency" : f"毎{kind}", "value" : "XX"}
    elif "/" in expr:
        base, step = expr.split("/")
        if base.isdigit():
            return {"frequency" : f"{base}{kind}開始で{int(step)}{kind}間ごと", "value" : "XX"}
        elif base == "*":
            return {"frequency" : f"{int(step)}{kind}間ごと", "value" : "XX"}
        elif "-" in base:
            return {"frequency" : f"{_parse_hyphen(base, kind)}の間で{int(step)}{kind}間ごと", "value" : "XX"}
    elif "-" in expr:
        return {"frequency" : f"{_parse_hyphen(expr, kind)}の間", "value" : "XX"}
    raise ValueError(f"解決できない値が含まれています（{kind}）: {expr}")


def time_to_jp(minute:str, hour:str) -> dict[str]:
    minutes = split_time(minute, "分")
    hours = split_time(hour, "時")

    example_all = [f"{_pad2(h)}:{_pad2(m)}" for h in hours for m in minutes]
    examples = ",".join(example_all[:EXAMPLE_MAX_LENGTH]) + (",...," + example_all[-1] if len(example_all) > EXAMPLE_MAX_LENGTH else "")
    result = {"examples" : examples}

    if is_not_simple_expr:
        return result
    
    minute_parts = parse_time(minute, "分")
    hour_parts = parse_time(hour, "時")

    if "frequency" in hour_parts and "frequency" in minute_parts:
        result["frequency"] = f\'{hour_parts["frequency"]}、{minute_parts["frequency"]} {hour_parts["value"]}:{minute_parts["value"]}\'
    elif "frequency" in hour_parts:
        result["frequency"] = f\'{hour_parts["frequency"]} {hour_parts["value"]}:{minute_parts["value"]}\'
    elif "frequency" in minute_parts:
        result["frequency"] = f\'{hour_parts["value"]}時台に{minute_parts["frequency"]} {hour_parts["value"]}:{minute_parts["value"]}\'
    return result


# ===== DOM / DOW / MON 日本語変換（Quartzの主要拡張に対応） =====
def _dom_to_jp(dom: str) -> str:
    kind = "日"
    dom = dom.upper()
    if dom == "*":
        return "毎日"
    if dom == "L":
        return "月末日"
    if dom in ("LW", "WL"):
        return "月末平日"
    m = re.fullmatch(r"(\\d+)W", dom)
    if m: 
        return f"{int(m.group(1))}日に最も近い平日"
    
    if dom.isdigit():
        val = int(dom)
        _validate_value_range(val, kind)
        return f"{val}日"
    elif "," in dom: 
        pieces = dom.split(",")
        vals = []
        for p in pieces:
            if p.isdigit():
                val = int(p)
                _validate_value_range(val, kind)
                vals.append(val)
            elif "/" in p:
                vals.extend(_split_slash(p, kind))
            elif "-" in p:
                vals.extend(_split_hyphen(p, kind))
            else:
                raise ValueError(f"解決できない値が含まれています: {dom}")
        return f"{\',\'.join(str(v) for v in sorted(set(vals)))}日"
    elif "/" in dom:
        base, step = dom.split("/")
        _validate_value(step, kind)
        step = int(step)
        if base.isdigit():
            val = int(base)
            _validate_value_range(val, kind)
            return f"{base}日開始で{int(step)}日おき"
        elif base == "*":
            return f"1日開始で{step}日おき"
        elif "-" in base:
            a, b = dom.split("-")
            _validate_hyphen(a, b, kind)
            return f"{a}～{b}日の間で{step}日おき"
    elif "-" in dom:
        a, b = dom.split("-")
        _validate_hyphen(a, b, kind)
        return f"{a}～{b}日"
    raise ValueError(f"解決できない値が含まれています（日）: {dom}")


def _dow_to_jp(dow: str) -> str:
    kind = "週"
    dow = dow.upper()
    if dow == "*": 
        return "毎日"
    if dow in ("1-5","MON-FRI"): 
        return "毎週（平日）"
    if dow in ("0,6","6,0","SUN,SAT","SAT,SUN"): 
        return "毎週（土日）"
    m = re.fullmatch(r"([0-7]|[A-Z]{3})L", dow)
    if m:
        n = _num_or_name_to_num(m.group(1), kind)
        return f"最後の{WEEK_JA[n]}曜日"
    m = re.fullmatch(r"([0-7]|[A-Z]{3})#([1-5])", dow)
    if m:
        n = _num_or_name_to_num(m.group(1), kind)
        k = int(m.group(2))
        return f"第{k}{WEEK_JA[n]}曜日"
    if dow == "L":
        return "最後の週"

    if "," in dow:
        pieces = dow.split(",")
        vals = []
        for p in pieces:
            if "-" in p:
                a, b = p.split("-")
                _validate_hyphen(a, b, kind)
                val_a = _num_or_name_to_num(a, kind); val_b = _num_or_name_to_num(b, kind)
                vals.extend(_generate_list(val_a, val_b))
            else:
                val = _num_or_name_to_num(p, kind)
                _validate_value_range(val, kind)
                vals.append(val)
        sorted_vals = sorted(set(vals))
        dow_list = [WEEK_JA[i] for i in sorted_vals]
        return f"毎週（{\',\'.join(dow_list)}）"
    if "/" in dow:
        base, step = dow.split("/")
        _validate_value(step, kind)
        step = int(step)
        if base == "*":
            base_jp = "全ての曜日"
        elif base in ("1-5", "MON-FRI"):
            base_jp = "平日"
        elif "-" in base:
            a, b = dow.split("-")
            _validate_hyphen(a, b, kind)
            val_a = _num_or_name_to_num(a, kind); val_b = _num_or_name_to_num(b, kind)
            base_jp = f"{WEEK_JA[val_a]}～{WEEK_JA[val_b]}"
        else:
            base_jp = WEEK_JA[_num_or_name_to_num(base, kind)]
        return f"{step}週間おき（{base_jp}）"
    if "-" in dow:
        a, b = dow.split("-")
        _validate_hyphen(a, b, kind)
        val_a = _num_or_name_to_num(a, kind); val_b = _num_or_name_to_num(b, kind)
        return f"毎週（{WEEK_JA[val_a]}～{WEEK_JA[val_b]}）"
    
    n = _num_or_name_to_num(dow, kind)
    return f"毎週{WEEK_JA[n]}曜日"


def _mon_to_jp(mon: str) -> str:
    kind = "月"
    mon = mon.upper()
    if mon == "*":
        return "毎月"
    if "," in mon:
        pieces = mon.split(",")
        vals = []
        for p in pieces:
            if "-" in p:
                a, b = p.split("-")
                _validate_hyphen(a, b, kind)
                val_a = _num_or_name_to_num(a, kind); val_b = _num_or_name_to_num(b, kind)
                vals.extend(_generate_list(val_a, val_b))
            else:
                vals.append(_num_or_name_to_num(p, kind))
        return f"（{\',\'.join(str(v) for v in sorted(set(vals)))}月）"
    if "/" in mon:
        base, step = mon.split("/")
        _validate_value(step, kind)
        step = int(step)
        if base == "*": 
            return f"{step}か月おき"
        if "-" in base:
            a, b = base.split("-")
            _validate_hyphen(a, b, kind)
            val_a = _num_or_name_to_num(a, kind); val_b = _num_or_name_to_num(b, kind)
            base_jp = f"{val_a}～{val_b}月の間"
        else:
            base_jp = f"{_num_or_name_to_num(base, kind)}月開始"
        return f"{step}か月おき（{base_jp}）"
    if "-" in mon:
        a, b = mon.split("-"); 
        _validate_hyphen(a, b, kind)
        val_a = _num_or_name_to_num(a, kind); val_b = _num_or_name_to_num(b, kind)
        return f"{val_a}～{val_b}月"
    return f"{_num_or_name_to_num(mon, kind)}月"


# ===== YEAR 日本語変換 =====
def year_to_jp(year: str|None) -> dict[str]:
    kind = "年"
    if not year or year == "*":
        return {"frequency" : "毎年"}
    elif year.isdigit():
        val = int(year)
        _validate_value_range(val, kind)
        return {"target_years" : f"{val}年"}
    elif "," in year:
        pieces = year.split(",")
        vals = []
        for p in pieces:
            if p.isdigit():
                val = int(p)
                _validate_value_range(val, kind)
                vals.append(val)
            elif "/" in p:
                vals.extend(_split_slash(p, kind))
            elif "-" in p:
                vals.extend(_split_hyphen(p, kind))
            else:
                raise ValueError(f"解決できない値が含まれています: {year}")
        return {"target_years" : f"{\',\'.join(str(v) for v in sorted(set(vals)))}年"}
    elif "/" in year:
        base, step = year.split("/")
        print(base)
        _validate_value(step, kind)
        step = int(step)
        if base.isdigit():
            val = int(base)
            _validate_value_range(val, kind)
            return {"frequency"    : f"{step}年おき",
                    "target_years" : f"{val}年～"
                    }
        elif base == "*":
            return {"frequency" : f"{step}年おき"}
        elif "-" in base:
            a, b = base.split("-")
            _validate_hyphen(a, b, kind)
            return {"frequency"    : f"{step}年おき",
                    "target_years" : _parse_hyphen(base, kind)
                    }
    elif "-" in year:
        a, b = year.split("-")
        _validate_hyphen(a, b, kind)
        return {"target_years" : f"{_parse_hyphen(year, kind)}"}
    raise ValueError(f"解決できない値が含まれています（年）: {year}")


# ===== メイン処理 =====
def cron_to_jp(expr: str) -> str:
    """
    5/6フィールド対応
    分・時は HH:MM,HH:MM,... を列挙（多い場合は ... で省略）
    日/月/曜日は日本語に整形
    JST想定（時差計算なし）
    """
    print(expr)
    minute, hour, dom, mon, dow, year = _parse_cron(expr)


    if dom not in ("*", "?") and dow not in ("*", "?"):
        raise ValueError(f"日(dom)と曜日(dow)はどちらかのみ指定してください: dom={dom}, dow={dow}")
    elif dom not in ("*", "?"):
        day_expr = _dom_to_jp(dom)
    elif dow not in ("*", "?"):
        day_expr = _dow_to_jp(dow)
    else:
        day_expr = "毎日"

    time_parts = time_to_jp(minute, hour)
    month_expr = _mon_to_jp(mon)
    year_parts = year_to_jp(year)

    # 接頭の文言調整
    frequency_prefix = ""
    if "frequency" in year_parts:
        frequency_prefix += year_parts["frequency"]
    # year vs month
    if (frequency_prefix == "毎年") and month_expr.startswith("毎月"):
        frequency_prefix = month_expr
    else:
        if (frequency_prefix != "") and (frequency_prefix != "毎年"):
            frequency_prefix += "の"
        frequency_prefix += month_expr
    # (year or month) vs (week or date)
    if any(frequency_prefix == i for i in ("毎年", "毎月")) and any(day_expr == i for i in ("毎週", "毎日")):
        frequency_prefix = day_expr
    else:
        frequency_prefix += day_expr

    frequency = frequency_prefix + " "
    examples =  ""
    # 時間(hh:mm)
    if "frequency" in time_parts:
        frequency += time_parts["frequency"]
        examples = f\'（例：{time_parts["examples"]}）\'
    else:
        frequency += time_parts["examples"]

    # 結合
    result = frequency + " (JST)"
    if "target_years" in year_parts:
        result += f\'（対象年: {year_parts["target_years"]}）\'
    result += examples
    return result

if __name__ == \'__main__\':
    return = cron_to_jp(cron_expr)
$$;
''')

In [None]:
spark.sql(f'''
CREATE OR REPLACE FUNCTION {source_catalog}.datasteward.generate_description_schema(name_ja STRING, explanation STRING)
RETURNS STRING
RETURN
  CONCAT(
    CONCAT("データソース名/システム名：", {source_catalog}.datasteward.clear_str(name_ja), "  \n"),
    CONCAT("Overview：このスキーマに含まれるデータは、主に上記のシステムから取り込まれています。", "  \n"),
    CASE
      WHEN {source_catalog}.datasteward.clear_str(explanation) != ""
      THEN {source_catalog}.datasteward.normalize_text("データソース概要：", explanation)
      ELSE ""
    END,
    "連絡先：[#sys-データ基盤の相談や情報共有](https://www.youtube.com/)"
  );
''')

In [None]:
spark.sql(f'''
CREATE OR REPLACE FUNCTION {source_catalog}.datasteward.generate_description_table(name_ja STRING, explanation STRING, type_conversion STRING, rule STRING, cron_schedule STRING, query STRING, reference STRING, link STRING)
RETURNS STRING
RETURN
  CONCAT(
    CONCAT("TableName：", {source_catalog}.datasteward.clear_str(name_ja), "  \n"),
    "Overview：  \n",
    CASE
      WHEN {source_catalog}.datasteward.clear_str(explanation) != ""
      THEN {source_catalog}.datasteward.normalize_text("", explanation)
      ELSE ""
    END,
    CASE
      WHEN {source_catalog}.datasteward.clear_str(type_conversion) != ""
      THEN {source_catalog}.datasteward.normalize_text("", type_conversion)
      ELSE ""
    END,
    CASE
      WHEN {source_catalog}.datasteward.clear_str(rule) != ""
      THEN {source_catalog}.datasteward.normalize_text("", rule)
      ELSE ""
    END,
    CASE
      WHEN {source_catalog}.datasteward.clear_str(cron_schedule) != ""
      THEN CONCAT("更新頻度：", {source_catalog}.datasteward.cron_to_japanese(cron_schedule), "  \n")
      ELSE ""
    END,
    CASE
      WHEN {source_catalog}.datasteward.clear_str(link) != ""
      THEN {source_catalog}.datasteward.normalize_url("元テーブル仕様書：", "link", link)
      ELSE ""
    END,
    CASE
      WHEN {source_catalog}.datasteward.clear_str(query) != ""
      THEN {source_catalog}.datasteward.normalize_url("作成クエリ：", "query", query)
      ELSE ""
    END,
    CASE
      WHEN {source_catalog}.datasteward.clear_str(reference) != ""
      THEN {source_catalog}.datasteward.normalize_url("対応チケット：", "reference", reference)
      ELSE ""
    END,
    "連絡先：[#sys-データ基盤の相談や情報共有](https://www.youtube.com/)"
  );
''')

In [None]:
spark.sql(f'''
CREATE OR REPLACE FUNCTION {source_catalog}.datasteward.generate_description_view(name_ja STRING, cron_schedule STRING, reference STRING)
RETURNS STRING
RETURN
  CONCAT(
    CONCAT("TableName：", {source_catalog}.datasteward.clear_str(name_ja), "  \n"),
    "Overview：元テーブル名から必要なカラムを取り出したビュー。詳細を確認したい場合は、元テーブルを参照してください。  \n",
    CASE
      WHEN {source_catalog}.datasteward.clear_str(cron_schedule) != ""
      THEN CONCAT("更新頻度：", {source_catalog}.datasteward.cron_to_japanese(cron_schedule), "  \n")
      ELSE ""
    END,
    CASE
      WHEN {source_catalog}.datasteward.clear_str(reference) != ""
      THEN {source_catalog}.datasteward.normalize_url("対応チケット：", "reference", reference)
      ELSE ""
    END,
    "連絡先：[#sys-データ基盤の相談や情報共有](https://www.youtube.com/)"
  );
''')