In [0]:
%python
from pyspark.sql import SparkSession
import json
import os

# Initialize Spark session
spark = SparkSession.builder.getOrCreate()

# Configuration
catalog_name = "cdl_bronze_dev"
schema_name = "poc_udp"
# List of selected tables to process (add or remove as needed)
#     "km_test_embedding_v2_vs",    "km_test_embeddings_vs",    "raw_trv_sample_doc_chunk_emb",    "patterns_withemb_index_test",    "raw_files_chunk_emb_index",
selected_tables = [
    "bronze_submissions",
    "gold_document_knowledge_base",
    "gold_json_docs_knowledge_base",
    "silver_document_payloads",
    "silver_xml_payloads",
    "silver_json_payloads"
]
# Target volume path (update as needed, e.g., '/Volumes/cdl_bronze_dev/poc_udp/schemas/')
target_volume_path = "/Volumes/cdl_bronze_dev/poc_udp/cora/schemas/"

# Ensure the target directory exists (for local or DBFS, adjust as needed)
os.makedirs(target_volume_path, exist_ok=True)

for table in selected_tables:
    full_table_name = f"{catalog_name}.{schema_name}.{table}"
    try:
        df = spark.table(full_table_name)
        schema_json = df.schema.json()
        # Write schema to a JSON file in the volume using dbutils.fs
        schema_file_path = f"{target_volume_path}{table}_schema.json"
        dbutils.fs.put(schema_file_path, schema_json, overwrite=True)
        print(f"Schema for {full_table_name} written to {schema_file_path}")
    except Exception as e:
        print(f"Failed to process {full_table_name}: {e}")
        raise e

Wrote 1507 bytes.
Schema for cdl_bronze_dev.poc_udp.bronze_submissions written to /Volumes/cdl_bronze_dev/poc_udp/cora/schemas/bronze_submissions_schema.json
Wrote 2357 bytes.
Schema for cdl_bronze_dev.poc_udp.gold_document_knowledge_base written to /Volumes/cdl_bronze_dev/poc_udp/cora/schemas/gold_document_knowledge_base_schema.json
Wrote 2188 bytes.
Schema for cdl_bronze_dev.poc_udp.gold_json_docs_knowledge_base written to /Volumes/cdl_bronze_dev/poc_udp/cora/schemas/gold_json_docs_knowledge_base_schema.json
Wrote 897 bytes.
Schema for cdl_bronze_dev.poc_udp.silver_document_payloads written to /Volumes/cdl_bronze_dev/poc_udp/cora/schemas/silver_document_payloads_schema.json
Wrote 1421 bytes.
Schema for cdl_bronze_dev.poc_udp.silver_xml_payloads written to /Volumes/cdl_bronze_dev/poc_udp/cora/schemas/silver_xml_payloads_schema.json
Wrote 1310 bytes.
Schema for cdl_bronze_dev.poc_udp.silver_json_payloads written to /Volumes/cdl_bronze_dev/poc_udp/cora/schemas/silver_json_payloads_schem