# Consolidating Product and Table Metadata into a Single Table

This table can be scheduled to update the metadata table periodically. 

In [0]:
#Product as a key, List of tables as the values in a list
products_and_tables = {
  "acquisition_and_origination_analytics": [
    "loan_acquisition",
    "application_table",
    "borrower_demographics",
    "credit_history"
  ],
  "investor_and_security_allocation": [
    "loan_acquisition",
    "loan_performance",
    "mbs_table"
  ],
  "fraud_detection_and_process_optimization": [
    "application_table",
    "credit_history",
    "loan_servicing_table",
    "uniform_closing_table"
  ],
  "compliance_and_regulatory_reporting": [
    "borrower_demographics",
    "housing_goal_table",
    "uniform_closing_table"
  ],
  "loss_and_credit_risk_management": [
    "credit_history",
    "loan_performance",
    "loss_severity_table"
  ],
  "loan_performance_and_servicing": [
    "loan_performance",
    "repayment_table",
    "loan_servicing_table"
  ],
  "collateral_and_property_intelligence": [
    "property_characteristics",
    "property_appraisal_table",
    "unit_rentals"
  ]
}


In [0]:
CATALOG = "fnma_product_catalog_jcg"
SCHEMA = "default"

rows = [
    (
        product,
        table,
        f"{CATALOG}.{SCHEMA}.{table}"
    )
    for product, tables in products_and_tables.items()
    for table in tables
]

df = spark.createDataFrame(
    rows,
    ["product", "table_name", "full_table_name"]
)

display(df)

df.write.mode("overwrite").saveAsTable("fnma_product_catalog_jcg.default.tables_and_products")

In [0]:
%sql
CREATE TABLE IF NOT EXISTS main_jcg.FNMA.products_tables_descriptions as
(SELECT
  tp.product,
  collect_list(
    named_struct(
      'table_name', t.table_name,
      'description', t.comment
    )
  ) AS tables_and_descriptions
FROM main_jcg.information_schema.tables t
LEFT JOIN fnma_product_catalog_jcg.default.tables_and_products tp
  ON t.table_name = tp.table_name
WHERE t.table_schema = 'fnma'
  AND tp.product IS NOT NULL
GROUP BY tp.product)

In [0]:
%sql
CREATE TABLE IF NOT EXISTS fnma_product_catalog_jcg.default.product_catalog AS
WITH schema_desciptions AS (
  SELECT
    s.SCHEMA_NAME as Product_Name,
    s.comment as Description,
    t.TAG_NAME,
    t.TAG_VALUE
  FROM
    fnma_product_catalog_jcg.INFORMATION_SCHEMA.SCHEMATA s
      LEFT JOIN fnma_product_catalog_jcg.INFORMATION_SCHEMA.SCHEMA_TAGS t
        ON s.SCHEMA_NAME = t.SCHEMA_NAME
  WHERE
    s.schema_name NOT LIKE '%default%'
    AND s.schema_name NOT LIKE '%information_schema%'
),

schema_table_list AS (
  SELECT product, CAST(to_json(tables_and_descriptions) AS STRING ) AS table_names 
  FROM main_jcg.FNMA.products_tables_descriptions
)
SELECT
  ROW_NUMBER() OVER (ORDER BY A.Product_Name) AS unique_id,
  A.*,
  ifnull(
    B.table_names,
    '[{"table_name":"","description":""}]'
  ) AS table_names
FROM
  schema_desciptions A
    LEFT JOIN schema_table_list B
      ON A.product_name = B.product



In [0]:
%sql
Select * from fnma_product_catalog_jcg.default.product_catalog;

In [0]:
%sql
-- So the vector search index can stay up to date
ALTER TABLE fnma_product_catalog_jcg.default.product_catalog SET TBLPROPERTIES (delta.enableChangeDataFeed = true);